A CNN Model with TensorFlow in R with API

We built the simple model in last article, we will build a more sophisticated model with TensorFlow.

This article is more like practicing and the code comes from:

https://rstudio.github.io/tensorflow/index.html

Sys.setenv(TENSORFLOW_PYTHON="/usr/local/bin/python")
# point to python 2.7 (self-installed, not the default one of OSX)
library(tensorflow)
###basically we do the simple MNIST again except that we use interactive session
sess<-tf$InteractiveSession()

###load default mnist data
datasets<-tf$contrib$learn$datasets
mnist<-datasets$mnist$read_data_sets("MNIST-data",one_hot=TRUE)

###start interactive session to interleave operations which
### build a computation graph with ones that run the graph

x<-tf$placeholder(tf$float32,shape(NULL,784L))
y<- tf$placeholder(tf$float32, shape(NULL, 10L))

##############################################################
##               we will build a simple model               ##
##############################################################

W <- tf$Variable(tf$zeros(shape(784L, 10L)))
b <- tf$Variable(tf$zeros(shape(10L)))

sess$run(tf$global_variables_initializer())

model<-tf$nn$softmax(tf$matmul(x,W)+b)

cross_entropy<-tf$reduce_mean(-tf$reduce_sum(y * tf$log(model), reduction_indices = 1L))

optimizer<-tf$train$GradientDescentOptimizer(0.5)
train_step<-optimizer$minimize(cross_entropy)

for (i in 1:3000) {
  batches<-mnist$train$next_batch(100L)
  batch_xs<-batches[[1]]
  batch_ys<-batches[[2]]
  sess$run(train_step,
           feed_dict=dict(x=batch_xs,y=batch_ys))
}

correction_p<-tf$equal(tf$argmax(model,1L),tf$argmax(y,1L))
accuracy<-tf$reduce_mean(tf$cast(correction_p,tf$float32))
sess$run(accuracy,feed_dict=dict(x=mnist$test$images,y=mnist$test$labels))
## [1] 0.924
##############################################################
##          based above we will build a CNN model           ##
##############################################################

###1. Create a lot of weights and biases, instead of setting only one layer as above .
weight_variable <- function(shape) {
  initial <- tf$truncated_normal(shape, stddev=0.1)
  tf$Variable(initial)
}

bias_variable <- function(shape) {
  initial <- tf$constant(0.1, shape=shape)
  tf$Variable(initial)
}

###2. convol and pool setting
conv2d <- function(x, W) {
  tf$nn$conv2d(x, W, strides=c(1L, 1L, 1L, 1L), padding='SAME')
}

max_pool_2x2 <- function(x) {
  tf$nn$max_pool(
    x,
    ksize=c(1L, 2L, 2L, 1L),
    strides=c(1L, 2L, 2L, 1L),
    padding='SAME')
}

###3. set up 1st layer
W_conv1 <- weight_variable(shape(5L, 5L, 1L, 32L))
b_conv1 <- bias_variable(shape(32L))

###4. reshape x to be compatible for 1st layer
x_image <- tf$reshape(x, shape(-1L, 28L, 28L, 1L))

###5.convol x with 1st layers
h_conv1 <- tf$nn$relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 <- max_pool_2x2(h_conv1)

###6. do the same for 2nd layer
W_conv2 <- weight_variable(shape = shape(5L, 5L, 32L, 64L))
b_conv2 <- bias_variable(shape = shape(64L))

h_conv2 <- tf$nn$relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 <- max_pool_2x2(h_conv2)

###7. Densely Connected: Image size has been reduced to 7x7, we add a fully-connected layer
###with 1024 neurons to allow processing on the entire image.
###We reshape the tensor from the pooling layer into a batch of vectors,
##multiply by a weight matrix, add a bias, and apply a ReLU.
W_fc1 <- weight_variable(shape(7L * 7L * 64L, 1024L))
b_fc1 <- bias_variable(shape(1024L))

h_pool2_flat <- tf$reshape(h_pool2, shape(-1L, 7L * 7L * 64L))
h_fc1 <- tf$nn$relu(tf$matmul(h_pool2_flat, W_fc1) + b_fc1)

###8. add drop out
keep_prob <- tf$placeholder(tf$float32)
h_fc1_drop <- tf$nn$dropout(h_fc1, keep_prob)

###9. the final softmax layer
W_fc2 <- weight_variable(shape(1024L, 10L))
b_fc2 <- bias_variable(shape(10L))

y_conv <- tf$nn$softmax(tf$matmul(h_fc1_drop, W_fc2) + b_fc2)

### 10. set up training arguments as the simple example, with AdamOptimizer
cross_entropy <- tf$reduce_mean(-tf$reduce_sum(y * tf$log(y_conv), reduction_indices=1L))
train_step <- tf$train$AdamOptimizer(1e-4)$minimize(cross_entropy)
correct_prediction <- tf$equal(tf$argmax(y_conv, 1L), tf$argmax(y, 1L))
accuracy <- tf$reduce_mean(tf$cast(correct_prediction, tf$float32))
sess$run(tf$global_variables_initializer())

###11. train with drop out rate
for (i in 1:3000) {
batch <- mnist$train$next_batch(50L)
if (i %% 100 == 0) {
  train_accuracy <- accuracy$eval(feed_dict = dict(
    x = batch[[1]], y = batch[[2]], keep_prob = 1.0))
  cat(sprintf("step %d, training accuracy %g\n", i, train_accuracy))
}
train_step$run(feed_dict = dict(
  x = batch[[1]], y = batch[[2]], keep_prob = 0.5))
}
## step 100, training accuracy 0.84
## step 200, training accuracy 0.86
## step 300, training accuracy 0.88
## step 400, training accuracy 0.9
## step 500, training accuracy 1
## step 600, training accuracy 0.96
## step 700, training accuracy 0.94
## step 800, training accuracy 0.98
## step 900, training accuracy 0.96
## step 1000, training accuracy 0.96
## step 1100, training accuracy 0.98
## step 1200, training accuracy 0.96
## step 1300, training accuracy 0.98
## step 1400, training accuracy 0.98
## step 1500, training accuracy 0.98
## step 1600, training accuracy 0.98
## step 1700, training accuracy 0.96
## step 1800, training accuracy 0.98
## step 1900, training accuracy 0.92
## step 2000, training accuracy 0.94
## step 2100, training accuracy 0.98
## step 2200, training accuracy 0.98
## step 2300, training accuracy 0.96
## step 2400, training accuracy 1
## step 2500, training accuracy 1
## step 2600, training accuracy 0.98
## step 2700, training accuracy 0.96
## step 2800, training accuracy 1
## step 2900, training accuracy 1
## step 3000, training accuracy 0.98
###12. evalaute
train_accuracy <- accuracy$eval(feed_dict = dict(
  x = mnist$test$images, y = mnist$test$labels, keep_prob = 1.0))
cat(sprintf("test accuracy %g", train_accuracy))
## test accuracy 0.9808

The result is not optimal and we can add more iterations to improve the final accuracy.

Leave a comment