python tensorflow realizes handwritten digit recognition based on cnn

  • 2020-06-19 11:00:02
  • OfStack

A handwritten numeral code based on cnn for your reference, the specific content is as follows


# -*- coding: utf-8 -*-

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

#  Load data set 
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

#  Start in an interactive manner session
#  If you don't use interactivity session , is at startup session The former must 
#  Build the entire compute diagram to start the compute diagram 
sess = tf.InteractiveSession()

""" Construct calculation diagram """
#  Use placeholders to create nodes for the input image and target output categories 
# shape The parameter is optional, so there it is tensorflow Dimensions can be automatically captured no 1 The resulting error 
x = tf.placeholder("float", shape=[None, 784]) #  Original input 
y_ = tf.placeholder("float", shape=[None, 10]) #  The target 

#  In order not to do the initialization over and over again while building the model, 
#  We define two functions for initialization 
def weight_variable(shape):
 #  Truncation is normally distributed ,stddev It's the standard deviation of a normal distribution 
 initial = tf.truncated_normal(shape=shape, stddev=0.1)
 return tf.Variable(initial)
def bias_variable(shape):
 initial = tf.constant(0.1, shape=shape)
 return tf.Variable(initial)

#  Convolution kernel pooling , Step for 1,0 margin 
def conv2d(x, W):
 return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
 return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
       strides=[1, 2, 2, 1], padding='SAME')

""" The first 1 Layer of convolution """
#  by 1 A convolution and 1 The largest pool composition. filter 5x5 The calculated 32 The feature is because of the use 32 The convolution of the filters 
#  The weight tensor of the convolution is [5, 5, 1, 32],1 Is the number of input channels, 32 Is the number of output channels 
W_conv1 = weight_variable([5, 5, 1, 32])
#  every 1 There are all the output channels 1 An offset 
b_conv1 = bias_variable([32])

#  Bit to use convolution, you have to convert the input to 4 Dimensional vector, 2 , 3 Dimensions represent the width and height of the image 
#  The last 1 Dimension represents the color channel of the image (since it is a grayscale image, the channel dimension is several 1 . RGB Number of image channels is 3 ) 
x_image = tf.reshape(x, [-1, 28, 28, 1])

#  The first 1 The convolution of the layers , use Relu As an activation function 
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1))
#  The first 1 Pooling results after layer convolution 
h_pool1 = max_pool_2x2(h_conv1)

""" The first 2 Layer of convolution """
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

""" The connection layer """
#  Image size reduced to 7*7 To join 1 a 1024 The full connection layer of a neuron 
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
#  The final pooling layer outputs the tensor reshape into 1 Dimensional vector 
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
#  The output of the full connection layer 
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

""" use Dropout Reduce overfitting """
#  use placeholder A placeholder to indicate the output of a neuron at dropout The probability of being constant 
#  Enable during training dropout , closed during the test dropout
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

""" Output layer """
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
#  Model predictive output 
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

#  Cross entropy loss 
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))

#  Model training , use AdamOptimizer Let's do the gradient's fastest descent 
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

#  predict , get True or False the List
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_conv, 1))
#  Converts a Boolean value to a floating-point number, taking the average as an accuracy 
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

#  in session Initialize a variable in session In the call 
sess.run(tf.global_variables_initializer())

#  Iterative optimization model 
for i in range(2000):
 #  Every time I take 50 A sample for training 
 batch = mnist.train.next_batch(50)
 if i%100 == 0:
  train_accuracy = accuracy.eval(feed_dict={
   x: batch[0], y_: batch[1], keep_prob: 1.0}) #  The model is not used in the middle dropout
  print("step %d, training accuracy %g" % (i, train_accuracy))
 train_step.run(feed_dict={x:batch[0], y_:batch[1], keep_prob: 0.5})
print("test accuracy %g" % accuracy.eval(feed_dict={
   x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

After 2000 iterations, the recognition accuracy on the test set was 0.9772...


Related articles: