python tensorflow realizes handwritten digit recognition based on cnn
- 2020-06-19 11:00:02
- OfStack
A handwritten numeral code based on cnn for your reference, the specific content is as follows
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# Load data set
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# Start in an interactive manner session
# If you don't use interactivity session , is at startup session The former must
# Build the entire compute diagram to start the compute diagram
sess = tf.InteractiveSession()
""" Construct calculation diagram """
# Use placeholders to create nodes for the input image and target output categories
# shape The parameter is optional, so there it is tensorflow Dimensions can be automatically captured no 1 The resulting error
x = tf.placeholder("float", shape=[None, 784]) # Original input
y_ = tf.placeholder("float", shape=[None, 10]) # The target
# In order not to do the initialization over and over again while building the model,
# We define two functions for initialization
def weight_variable(shape):
# Truncation is normally distributed ,stddev It's the standard deviation of a normal distribution
initial = tf.truncated_normal(shape=shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# Convolution kernel pooling , Step for 1,0 margin
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
""" The first 1 Layer of convolution """
# by 1 A convolution and 1 The largest pool composition. filter 5x5 The calculated 32 The feature is because of the use 32 The convolution of the filters
# The weight tensor of the convolution is [5, 5, 1, 32],1 Is the number of input channels, 32 Is the number of output channels
W_conv1 = weight_variable([5, 5, 1, 32])
# every 1 There are all the output channels 1 An offset
b_conv1 = bias_variable([32])
# Bit to use convolution, you have to convert the input to 4 Dimensional vector, 2 , 3 Dimensions represent the width and height of the image
# The last 1 Dimension represents the color channel of the image (since it is a grayscale image, the channel dimension is several 1 . RGB Number of image channels is 3 )
x_image = tf.reshape(x, [-1, 28, 28, 1])
# The first 1 The convolution of the layers , use Relu As an activation function
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1))
# The first 1 Pooling results after layer convolution
h_pool1 = max_pool_2x2(h_conv1)
""" The first 2 Layer of convolution """
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
""" The connection layer """
# Image size reduced to 7*7 To join 1 a 1024 The full connection layer of a neuron
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
# The final pooling layer outputs the tensor reshape into 1 Dimensional vector
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
# The output of the full connection layer
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
""" use Dropout Reduce overfitting """
# use placeholder A placeholder to indicate the output of a neuron at dropout The probability of being constant
# Enable during training dropout , closed during the test dropout
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
""" Output layer """
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
# Model predictive output
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# Cross entropy loss
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
# Model training , use AdamOptimizer Let's do the gradient's fastest descent
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# predict , get True or False the List
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y_conv, 1))
# Converts a Boolean value to a floating-point number, taking the average as an accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# in session Initialize a variable in session In the call
sess.run(tf.global_variables_initializer())
# Iterative optimization model
for i in range(2000):
# Every time I take 50 A sample for training
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0}) # The model is not used in the middle dropout
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run(feed_dict={x:batch[0], y_:batch[1], keep_prob: 0.5})
print("test accuracy %g" % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
After 2000 iterations, the recognition accuracy on the test set was 0.9772...