We built the simple model in last article, we will build a more sophisticated model with TensorFlow.
This article is more like practicing and the code comes from:
https://rstudio.github.io/tensorflow/index.html
Sys.setenv(TENSORFLOW_PYTHON="/usr/local/bin/python") # point to python 2.7 (self-installed, not the default one of OSX) library(tensorflow) ###basically we do the simple MNIST again except that we use interactive session sess<-tf$InteractiveSession() ###load default mnist data datasets<-tf$contrib$learn$datasets mnist<-datasets$mnist$read_data_sets("MNIST-data",one_hot=TRUE) ###start interactive session to interleave operations which ### build a computation graph with ones that run the graph x<-tf$placeholder(tf$float32,shape(NULL,784L)) y<- tf$placeholder(tf$float32, shape(NULL, 10L)) ############################################################## ## we will build a simple model ## ############################################################## W <- tf$Variable(tf$zeros(shape(784L, 10L))) b <- tf$Variable(tf$zeros(shape(10L))) sess$run(tf$global_variables_initializer()) model<-tf$nn$softmax(tf$matmul(x,W)+b) cross_entropy<-tf$reduce_mean(-tf$reduce_sum(y * tf$log(model), reduction_indices = 1L)) optimizer<-tf$train$GradientDescentOptimizer(0.5) train_step<-optimizer$minimize(cross_entropy) for (i in 1:3000) { batches<-mnist$train$next_batch(100L) batch_xs<-batches[[1]] batch_ys<-batches[[2]] sess$run(train_step, feed_dict=dict(x=batch_xs,y=batch_ys)) } correction_p<-tf$equal(tf$argmax(model,1L),tf$argmax(y,1L)) accuracy<-tf$reduce_mean(tf$cast(correction_p,tf$float32)) sess$run(accuracy,feed_dict=dict(x=mnist$test$images,y=mnist$test$labels)) ## [1] 0.924 ############################################################## ## based above we will build a CNN model ## ############################################################## ###1. Create a lot of weights and biases, instead of setting only one layer as above . weight_variable <- function(shape) { initial <- tf$truncated_normal(shape, stddev=0.1) tf$Variable(initial) } bias_variable <- function(shape) { initial <- tf$constant(0.1, shape=shape) tf$Variable(initial) } ###2. convol and pool setting conv2d <- function(x, W) { tf$nn$conv2d(x, W, strides=c(1L, 1L, 1L, 1L), padding='SAME') } max_pool_2x2 <- function(x) { tf$nn$max_pool( x, ksize=c(1L, 2L, 2L, 1L), strides=c(1L, 2L, 2L, 1L), padding='SAME') } ###3. set up 1st layer W_conv1 <- weight_variable(shape(5L, 5L, 1L, 32L)) b_conv1 <- bias_variable(shape(32L)) ###4. reshape x to be compatible for 1st layer x_image <- tf$reshape(x, shape(-1L, 28L, 28L, 1L)) ###5.convol x with 1st layers h_conv1 <- tf$nn$relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 <- max_pool_2x2(h_conv1) ###6. do the same for 2nd layer W_conv2 <- weight_variable(shape = shape(5L, 5L, 32L, 64L)) b_conv2 <- bias_variable(shape = shape(64L)) h_conv2 <- tf$nn$relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 <- max_pool_2x2(h_conv2) ###7. Densely Connected: Image size has been reduced to 7x7, we add a fully-connected layer ###with 1024 neurons to allow processing on the entire image. ###We reshape the tensor from the pooling layer into a batch of vectors, ##multiply by a weight matrix, add a bias, and apply a ReLU. W_fc1 <- weight_variable(shape(7L * 7L * 64L, 1024L)) b_fc1 <- bias_variable(shape(1024L)) h_pool2_flat <- tf$reshape(h_pool2, shape(-1L, 7L * 7L * 64L)) h_fc1 <- tf$nn$relu(tf$matmul(h_pool2_flat, W_fc1) + b_fc1) ###8. add drop out keep_prob <- tf$placeholder(tf$float32) h_fc1_drop <- tf$nn$dropout(h_fc1, keep_prob) ###9. the final softmax layer W_fc2 <- weight_variable(shape(1024L, 10L)) b_fc2 <- bias_variable(shape(10L)) y_conv <- tf$nn$softmax(tf$matmul(h_fc1_drop, W_fc2) + b_fc2) ### 10. set up training arguments as the simple example, with AdamOptimizer cross_entropy <- tf$reduce_mean(-tf$reduce_sum(y * tf$log(y_conv), reduction_indices=1L)) train_step <- tf$train$AdamOptimizer(1e-4)$minimize(cross_entropy) correct_prediction <- tf$equal(tf$argmax(y_conv, 1L), tf$argmax(y, 1L)) accuracy <- tf$reduce_mean(tf$cast(correct_prediction, tf$float32)) sess$run(tf$global_variables_initializer()) ###11. train with drop out rate for (i in 1:3000) { batch <- mnist$train$next_batch(50L) if (i %% 100 == 0) { train_accuracy <- accuracy$eval(feed_dict = dict( x = batch[[1]], y = batch[[2]], keep_prob = 1.0)) cat(sprintf("step %d, training accuracy %g\n", i, train_accuracy)) } train_step$run(feed_dict = dict( x = batch[[1]], y = batch[[2]], keep_prob = 0.5)) } ## step 100, training accuracy 0.84 ## step 200, training accuracy 0.86 ## step 300, training accuracy 0.88 ## step 400, training accuracy 0.9 ## step 500, training accuracy 1 ## step 600, training accuracy 0.96 ## step 700, training accuracy 0.94 ## step 800, training accuracy 0.98 ## step 900, training accuracy 0.96 ## step 1000, training accuracy 0.96 ## step 1100, training accuracy 0.98 ## step 1200, training accuracy 0.96 ## step 1300, training accuracy 0.98 ## step 1400, training accuracy 0.98 ## step 1500, training accuracy 0.98 ## step 1600, training accuracy 0.98 ## step 1700, training accuracy 0.96 ## step 1800, training accuracy 0.98 ## step 1900, training accuracy 0.92 ## step 2000, training accuracy 0.94 ## step 2100, training accuracy 0.98 ## step 2200, training accuracy 0.98 ## step 2300, training accuracy 0.96 ## step 2400, training accuracy 1 ## step 2500, training accuracy 1 ## step 2600, training accuracy 0.98 ## step 2700, training accuracy 0.96 ## step 2800, training accuracy 1 ## step 2900, training accuracy 1 ## step 3000, training accuracy 0.98 ###12. evalaute train_accuracy <- accuracy$eval(feed_dict = dict( x = mnist$test$images, y = mnist$test$labels, keep_prob = 1.0)) cat(sprintf("test accuracy %g", train_accuracy)) ## test accuracy 0.9808
The result is not optimal and we can add more iterations to improve the final accuracy.