cnn进行端到端的验证码识别改进
2024-10-16 04:42:00
keras_cnn.py 训练及建模
#!/usr/bin/env python
# coding=utf- """
利用keras cnn进行端到端的验证码识别, 简单直接暴力。
迭代100次可以达到95%的准确率,但是很容易过拟合,泛化能力糟糕, 除了增加训练数据还没想到更好的方法. __autho__: jkmiao
__email__: miao1202@.com
___date__:-- """
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten, Input, merge
from keras.layers import Convolution2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import os, random
import numpy as np
from keras.models import model_from_json
from util import CharacterTable
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
# from keras.utils.visualize_util import plot def load_data(path='img/clearNoise/'):
fnames = [os.path.join(path, fname) for fname in os.listdir(path) if fname.endswith('jpg')]
random.shuffle(fnames)
data, label = [], []
for i, fname in enumerate(fnames):
imgLabel = fname.split('/')[-].split('_')[]
if len(imgLabel)!=:
print 'error: ', fname
continue
imgM = np.array(Image.open(fname).convert('L'))
imgM = * (imgM>)
data.append(imgM.reshape((, , )))
label.append(imgLabel.lower())
return np.array(data), label ctable = CharacterTable()
data, label = load_data()
print data[].max(), data[].min()
label_onehot = np.zeros((len(label), ))
for i, lb in enumerate(label):
label_onehot[i,:] = ctable.encode(lb)
print data.shape, data[-].max(), data[-].min()
print label_onehot.shape datagen = ImageDataGenerator(shear_range=0.08, zoom_range=0.08, horizontal_flip=False,
rotation_range=, width_shift_range=0.06, height_shift_range=0.06) datagen.fit(data) x_train, x_test, y_train, y_test = train_test_split(data, label_onehot, test_size=0.1) DEBUG = False # 建模
if DEBUG:
input_img = Input(shape=(, , )) inner = Convolution2D(, , , border_mode='same', activation='relu')(input_img)
inner = MaxPooling2D(pool_size=(,))(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
inner = MaxPooling2D(pool_size=(,))(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
encoder_a = Flatten()(inner) inner = Convolution2D(, , , border_mode='same', activation='relu')(input_img)
inner = MaxPooling2D(pool_size=(,))(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
inner = MaxPooling2D(pool_size=(,))(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
encoder_b = Flatten()(inner) inner = Convolution2D(, , , border_mode='same', activation='relu')(input_img)
inner = MaxPooling2D(pool_size=(,))(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
inner = MaxPooling2D(pool_size=(,))(inner)
inner = Convolution2D(, , , border_mode='same')(inner)
encoder_c = Flatten()(inner) input = merge([encoder_a, encoder_b, encoder_c], mode='concat', concat_axis=-)
drop = Dropout(0.5)(input)
flatten = Dense()(drop)
flatten = Dropout(0.5)(flatten) fc1 = Dense(, activation='softmax')(flatten)
fc2 = Dense(, activation='softmax')(flatten)
fc3 = Dense(, activation='softmax')(flatten)
fc4 = Dense(, activation='softmax')(flatten)
fc5 = Dense(, activation='softmax')(flatten)
fc6 = Dense(, activation='softmax')(flatten)
merged = merge([fc1, fc2, fc3, fc4, fc5, fc6], mode='concat', concat_axis=-) model = Model(input=input_img, output=merged)
else:
model = model_from_json(open('model/ba_cnn_model3.json').read())
model.load_weights('model/ba_cnn_model3.h5') # 编译
# model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # plot(model, to_file='model3.png', show_shapes=True) # 训练 early_stopping = EarlyStopping(monitor='val_loss', patience=) model.fit_generator(datagen.flow(x_train, y_train, batch_size=), samples_per_epoch=len(x_train), nb_epoch=, validation_data=(x_test, y_test), callbacks=[early_stopping] ) json_string = model.to_json()
with open('./model/ba_cnn_model4.json', 'w') as fw:
fw.write(json_string)
model.save_weights('./model/ba_cnn_model4.h5') print 'done saved model cnn3' # 测试
y_pred = model.predict(x_test, verbose=)
cnt =
for i in range(len(y_pred)):
guess = ctable.decode(y_pred[i])
correct = ctable.decode(y_test[i])
if guess == correct:
cnt +=
if i%==:
print '--'*, i
print 'y_pred', guess
print 'y_test', correct
print cnt/float(len(y_pred))
apicode.py 模型使用
#!/usr/bin/env python
# coding=utf- from util import CharacterTable
from keras.models import model_from_json
from PIL import Image
import matplotlib.pyplot as plt
import os
import numpy as np
from prepare import clearNoise def img2vec(fname):
data = []
img = clearNoise(fname).convert('L')
imgM = 1.0 * (np.array(img)>)
print imgM.max(), imgM.min()
data.append(imgM.reshape((, , )))
return np.array(data), imgM ctable = CharacterTable() model = model_from_json(open('model/ba_cnn_model4.json').read())
model.load_weights('model/ba_cnn_model4.h5') def test(path):
fnames = [ os.path.join(path, fname) for fname in os.listdir(path) ][:]
correct =
for idx, fname in enumerate(fnames, ):
data, imgM = img2vec(fname)
y_pred = model.predict(data)
result = ctable.decode(y_pred[])
label = fname.split('/')[-].split('_')[]
if result == label:
correct +=
print 'correct', fname
else:
print result, label
print 'accuracy: ',idx, float(correct)/idx
print '=='*
# plt.subplot()
# plt.imshow(Image.open(fname).convert('L'), plt.cm.gray)
# plt.title(fname)
#
# plt.subplot()
# plt.imshow(imgM, plt.cm.gray)
# plt.title(result)
# plt.show() test('test')
最新文章
- Markdown编辑器语法指南2
- JS实现打印功能
- 更新日志 - BugHD 与你的应用一起成长
- 储物柜soket通信协议和中间件实现技术细节
- Yii2提示信息设置方法
- C# 调用导致堆栈不对称。原因可能是托管的 PInvoke 签名与非托管的目标签名不匹配
- [swustoj 1091] 土豪我们做朋友吧
- 1.2、Mybatis二级缓存测试
- bzoj1303
- 字符串的encode与decode解决乱码问题
- 关于User&;nbsp;Defined&;nbsp;Runtime&;nbsp;Attributes的小技巧
- Xcode6中怎么添加空工程模板
- c#中 uint--byte[]--char[]--string相互转换汇总
- 快速构建Windows 8风格应用36-商店应用发布流程
- JavaScript两个变量交换值(不使用临时变量)
- Wincc flexable的IO域组态
- 立即掌握SSM框架的要诀
- HNOI2008玩具装箱
- 运用SqlSugar框架+Axios写的增删查案例
- FPGA——入手(零)