tensorflow学习,建立卷积深度学习网络(CNN),识别和破解captcha图形验证码(21)
发表于: 2018-07-17 22:36:56 | 已被阅读: 30 | 分类于: tensorflow
在python如何生成验证码小节,我们利用captcha
库做了一个图形验证码产生器,可以产生带one-hot
标签的图片数据集。本节将基于此数据集,建立一个卷积深度学习网络(CNN),并且训练之,希望可以识别破解此验证码数据集。
CNN卷积深度学习网络的结构
计划建立 5 层网络,前 3 层为卷积层,第 4、5 层为全连接层。对 4 层隐藏层都进行 dropout。于是,网络结构如下所示:
input
——>conv——>pool——>dropout
——>conv——>pool——>dropout
——>conv——>pool——>dropout
——>fully connected layer——>dropout
——>fully connected layer——>
output
基本模块的初始化
在建立 CNN卷积深度学习网络之前,首先把用到的方法再封装一下,方便之后的调用,下面直接放出代码:
def conv2d(self,x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(self,x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(self,shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(self,shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
如上面的代码,卷积的横竖方向步进都为 1,与池化层的 padding 都设置为
建立 CNN 卷积深度学习网络模型
先建立第一层卷积,代码如下
# first layer
w_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(tf.nn.bias_add(conv2d(x_images, w_conv1), b_conv1))
h_pool1 = max_pool_2x2(h_conv1)
h_dropout1 = tf.nn.dropout(h_pool1,keep_prob)
conv_width = math.ceil(width/2)
conv_height = math.ceil(height/2)
使用了 5x5 的卷积核,从验证码里提取出 32 种特征。使用了
接下来的两层卷积都是类似的,权重系数的形状略有不同,注意调整下
#second layer
w_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(tf.nn.bias_add(conv2d(h_dropout1, w_conv2), b_conv2))
h_pool2 = max_pool_2x2(h_conv2)
h_dropout2 = tf.nn.dropout(h_pool2,keep_prob)
conv_width = math.ceil(conv_width/2)
conv_height = math.ceil(conv_height/2)
#third layer
w_conv3 = weight_variable([5, 5, 64, 64])
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(tf.nn.bias_add(conv2d(h_dropout2, w_conv3), b_conv3))
h_pool3 = max_pool_2x2(h_conv3)
h_dropout3 = tf.nn.dropout(h_pool3,keep_prob)
conv_width = math.ceil(conv_width/2)
conv_height = math.ceil(conv_height/2)
紧接着就是全连接层了,建立方法和前面几节是类似的。都是将卷积后的所有特征压平为一维向量,再逐步降维到类别数目。我们建立两层全连接层,第一层降维到 1024,第二层再从 1024 降维到
#first fully layer
conv_width = int(conv_width)
conv_height = int(conv_height)
w_fc1 = weight_variable([64*conv_width*conv_height,1024])
b_fc1 = bias_variable([1024])
h_dropout3_flat = tf.reshape(h_dropout3,[-1,64*conv_width*conv_height])
h_fc1 = tf.nn.relu(tf.nn.bias_add(tf.matmul(h_dropout3_flat, w_fc1), b_fc1))
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#second fully layer
w_fc2 = weight_variable([1024,char_num*classes])
b_fc2 = bias_variable([char_num*classes])
y_conv = tf.add(tf.matmul(h_fc1_drop, w_fc2), b_fc2)
接下来,为了方便之后的调用,我们将以上代码封装为一个类,全部代码组合如下(
# -*- coding: utf-8 -*
import tensorflow as tf
import math
class captchaModel():
def __init__(self,
width = 160,
height = 60,
char_num = 4,
classes = 62):
self.width = width
self.height = height
self.char_num = char_num
self.classes = classes
def conv2d(self,x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(self,x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def weight_variable(self,shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(self,shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def create_model(self,x_images,keep_prob):
#first layer
w_conv1 = self.weight_variable([5, 5, 1, 32])
b_conv1 = self.bias_variable([32])
h_conv1 = tf.nn.relu(tf.nn.bias_add(self.conv2d(x_images, w_conv1), b_conv1))
h_pool1 = self.max_pool_2x2(h_conv1)
h_dropout1 = tf.nn.dropout(h_pool1,keep_prob)
conv_width = math.ceil(self.width/2)
conv_height = math.ceil(self.height/2)
#second layer
w_conv2 = self.weight_variable([5, 5, 32, 64])
b_conv2 = self.bias_variable([64])
h_conv2 = tf.nn.relu(tf.nn.bias_add(self.conv2d(h_dropout1, w_conv2), b_conv2))
h_pool2 = self.max_pool_2x2(h_conv2)
h_dropout2 = tf.nn.dropout(h_pool2,keep_prob)
conv_width = math.ceil(conv_width/2)
conv_height = math.ceil(conv_height/2)
#third layer
w_conv3 = self.weight_variable([5, 5, 64, 64])
b_conv3 = self.bias_variable([64])
h_conv3 = tf.nn.relu(tf.nn.bias_add(self.conv2d(h_dropout2, w_conv3), b_conv3))
h_pool3 = self.max_pool_2x2(h_conv3)
h_dropout3 = tf.nn.dropout(h_pool3,keep_prob)
conv_width = math.ceil(conv_width/2)
conv_height = math.ceil(conv_height/2)
#first fully layer
conv_width = int(conv_width)
conv_height = int(conv_height)
w_fc1 = self.weight_variable([64*conv_width*conv_height,1024])
b_fc1 = self.bias_variable([1024])
h_dropout3_flat = tf.reshape(h_dropout3,[-1,64*conv_width*conv_height])
h_fc1 = tf.nn.relu(tf.nn.bias_add(tf.matmul(h_dropout3_flat, w_fc1), b_fc1))
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#second fully layer
w_fc2 = self.weight_variable([1024,self.char_num*self.classes])
b_fc2 = self.bias_variable([self.char_num*self.classes])
y_conv = tf.add(tf.matmul(h_fc1_drop, w_fc2), b_fc2)
return y_conv
训练CNN卷积深度学习网络
这一部分,主要是将
我们先用上一节建立的类具体化一个实例,然后获取需要用到的参数。
captcha = generate_captcha.generateCaptcha()
width,height,char_num,characters,classes = captcha.get_parameter()
建立 placeholder
x = tf.placeholder(tf.float32, [None, height,width,1])
y_ = tf.placeholder(tf.float32, [None, char_num*classes])
keep_prob = tf.placeholder(tf.float32)
然后就可以用上一部分建立的
model = captcha_model.captchaModel(width,height,char_num,classes)
y_conv = model.create_model(x,keep_prob)
训练网络需要先定义 loss 函数,这里使用了
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_,logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
predict = tf.reshape(y_conv, [-1,char_num, classes])
real = tf.reshape(y_,[-1,char_num, classes])
correct_prediction = tf.equal(tf.argmax(predict,2), tf.argmax(real,2))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
接下来就是训练了,这里也定义了一个 saver,可以在训练网络完成后,将结果保存下来(可参考
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 1
while True:
batch_x,batch_y = next(captcha.gen_captcha(64))
_,loss = sess.run([train_step,cross_entropy],feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.75})
print ('step:%d,loss:%f' % (step,loss))
if step % 100 == 0:
batch_x_test,batch_y_test = next(captcha.gen_captcha(100))
acc = sess.run(accuracy, feed_dict={x: batch_x_test, y_: batch_y_test, keep_prob: 1.})
print ('###############################################step:%d,accuracy:%f' % (step,acc))
if acc > 0.99:
saver.save(sess,"./capcha_model.ckpt")
break
step += 1
全部代码整合如下,文件名取为
#-*- coding:utf-8 -*-
import tensorflow as tf
import numpy as np
import string
import generate_captcha
import captcha_model
if __name__ == '__main__':
captcha = generate_captcha.generateCaptcha()
width,height,char_num,characters,classes = captcha.get_parameter()
x = tf.placeholder(tf.float32, [None, height,width,1])
y_ = tf.placeholder(tf.float32, [None, char_num*classes])
keep_prob = tf.placeholder(tf.float32)
model = captcha_model.captchaModel(width,height,char_num,classes)
y_conv = model.create_model(x,keep_prob)
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_,logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
predict = tf.reshape(y_conv, [-1,char_num, classes])
real = tf.reshape(y_,[-1,char_num, classes])
correct_prediction = tf.equal(tf.argmax(predict,2), tf.argmax(real,2))
correct_prediction = tf.cast(correct_prediction, tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 1
while True:
batch_x,batch_y = next(captcha.gen_captcha(64))
_,loss = sess.run([train_step,cross_entropy],feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.75})
print ('step:%d,loss:%f' % (step,loss))
if step % 100 == 0:
batch_x_test,batch_y_test = next(captcha.gen_captcha(100))
acc = sess.run(accuracy, feed_dict={x: batch_x_test, y_: batch_y_test, keep_prob: 1.})
print ('###############################################step:%d,accuracy:%f' % (step,acc))
if acc > 0.99:
saver.save(sess,"./capcha_model.ckpt")
break
step += 1
然后就可以训练了。
$ python train.py
...
step:17760, loss:0.083060
step:17761, loss:0.082030
step:17762, loss:0.083040
step:17763, loss:0.083019
step:17764, loss:0.082072
step:17765, loss:0.081089
...
这个训练周期就比较长了,采用 GPU 需要 4-5 个小时左右,CPU 大概需要 20 个小时左右。下一节,我们放出训练完成后的网络模型,并且测试一下,看看到底建立的 CNN 卷积深度学习网络究竟能否识别和破解验证码。