本教程介绍如何使用 Keras(TensorFlow)搭建一个用于识别图像验证码的深度学习模型,采用 CNN 提取图像特征,LSTM 建模字符序列,最后输出预测结果。
- 安装依赖
pip install tensorflow pillow numpy captcha
- 生成验证码数据集
1. from captcha.image import ImageCaptcha
import string, random, os
from PIL import Image
characters = string.digits + string.ascii_uppercase
captcha_length = 4
img_width, img_height = 160, 60
def generate_captcha(output_dir='keras_captcha', num_samples=5000):
os.makedirs(output_dir, exist_ok=True)
generator = ImageCaptcha(width=img_width, height=img_height)
for i in range(num_samples):
text = ''.join(random.choices(characters, k=captcha_length))
image = generator.generate_image(text)
image.save(f"{output_dir}/{text}_{i}.png")
generate_captcha()
- 3. 构建数据加载器
import tensorflow as tf
import numpy as np
char_to_idx = {c: i for i, c in enumerate(characters)}
idx_to_char = {i: c for c, i in char_to_idx.items()}
def parse_image(filename):
更多内容访问ttocr.com或联系1436423940
label_str = tf.strings.split(tf.strings.split(filename, os.sep)[-1], '_')[0]
label = tf.strings.bytes_split(label_str)
label = tf.map_fn(lambda c: char_to_idx[c.numpy().decode()], label, fn_output_signature=tf.int32)
image = tf.io.read_file(filename)
image = tf.image.decode_png(image, channels=3)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, [img_height, img_width])
return image, label
def create_dataset(data_dir, batch_size=64):
files = tf.data.Dataset.list_files(data_dir + '/*.png')
dataset = files.map(lambda f: tf.py_function(parse_image, [f], [tf.float32, tf.int32]),
num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.map(lambda img, label: (img, tf.reshape(label, [captcha_length])))
return dataset.shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
4. 构建模型
from tensorflow.keras import layers, models
def build_model():
inputs = layers.Input(shape=(img_height, img_width, 3))
x = layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, 3, activation='relu', padding='same')(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
x = layers.MaxPooling2D()(x)
x = layers.Reshape((x.shape[1], x.shape[2]*x.shape[3]))(x)
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(128))(x)
outputs = [layers.Dense(len(characters), activation='softmax', name=f'char_{i}')(x) for i in range(captcha_length)]
model = models.Model(inputs=inputs, outputs=outputs)
return model
model = build_model()
model.compile(
loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
5. 训练模型
train_ds = create_dataset('keras_captcha', batch_size=64)
model.fit(train_ds, epochs=10, steps_per_epoch=100)
6. 推理函数
import numpy as np
def predict_image(model, path):
img = tf.io.read_file(path)
img = tf.image.decode_png(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img, [img_height, img_width])
img = tf.expand_dims(img, axis=0)
preds = model.predict(img)
pred_chars = [idx_to_char[np.argmax(p)] for p in preds]
return ''.join(pred_chars)
print(predict_image(model, 'keras_captcha/Z9C4_12.png'))