使用机器学习识别简单的验证码（二）

112次阅读

共计 2295 个字符，预计需要花费 6 分钟才能阅读完成。

原图片：
使用机器学习识别简单的验证码（二）
测试结果：

结果正确！

# encoding=utf-8
"""
Date:2019-08-10 13:43
User:LiYu
Email:liyu_5498@163.com

"""
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
from 处理切分验证码图片 import *


def img2data(num):
    X = []
    Y = []
    labels = getAllFileLabels(''.join(['cut_number/', str(num)]))
    for label in labels:
        img = Image.open(''.join(['cut_number/', str(num), '/', label, '.png']))
        # convert("L")变为灰度图
        img_gray = img.convert('L')
        img_gray = np.array(img_gray)
        w, h = img_gray.shape
        for x in range(w):
            for y in range(h):
                gray = img_gray[x, y]
                if gray <= 250:
                    img_gray[x, y] = 0
                else:
                    img_gray[x, y] = 1
        img_gray = np.reshape(img_gray, (1, -1))[0]
        # print("img_gray: ", img_gray)
        X.append(img_gray)
        Y.append(num)
    # print('X: ', X)
    # print('Y: ', Y)
    return X, Y


def getAllFileLabels(dir):
    """获取文件夹里的所有图片名，只保留数字部分"""
    imgFileList = os.listdir(dir)
    labels = []
    for imgFile in imgFileList:
        labels.append(imgFile.rstrip('.png'))
    return labels


def loadData():
    X = []
    Y = []
    for i in range(10):
        Xi, Yi = img2data(i)
        for x in Xi:
            X.append(x)
        for y in Yi:
            Y.append(y)
    X = np.array(X)
    Y = np.array(Y)
    print("X: ", X)
    print("Y: ", Y)
    return X, Y


def generatrModel(X, Y):
    # X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
    logReg = LogisticRegression(multi_class='ovr', solver='sag')
    logReg.fit(X, Y)
    print(logReg.score(X, Y))
    joblib.dump(logReg, 'captcha_model/captcha_model.model')


def getModel():
    model = joblib.load('captcha_model/captcha_model.model')
    return model


def captcha_predict(path):
    img_gray = binarization('captcha_images_test/' + path)
    noiseReduction(img_gray, path)
    labels = ['0', '1', '2', '3', '4']
    img = Image.open('captcha_images_test/' + path)
    for i in range(5):
        pic = img.crop((100 * (1 + i), 170, 100 * (1 + i) + 100, 280))
        plt.imshow(pic)
        pic.save(''.join(['captcha_images_test/', labels[i], '.png']))

    result = ''
    model = getModel()
    for i in range(5):
        path = ''.join(['captcha_images_test/', labels[i], '.png'])
        img = Image.open(path)
        img_gray = img.convert('L')
        img_gray = np.array(img_gray)
        w, h = img_gray.shape
        for x in range(w):
            for y in range(h):
                gray = img_gray[x, y]
                if gray <= 250:
                    img_gray[x, y] = 0
                else:
                    img_gray[x, y] = 1
        img_gray = np.reshape(img_gray, (1, -1))
        X = img_gray[0]
        Y_pre = model.predict([X])
        result = ''.join([result, str(Y_pre[0])])
    return result


if __name__ == '__main__':
    # # 建模过程
    # X, Y = loadData()
    # generatrModel(X, Y)

    # 测试
    imgs = ['unknow2.png', 'unknow3.png', 'unknow4.png']
    for img in imgs:
        result = captcha_predict(img)
        print(result)

正文完

发表至：教程

2019-11-26