数据处理,建模及测试
测试效果展示
原图片:
测试结果:
结果正确!
数据处理,建模及测试代码实现
# encoding=utf-8 """ Date:2019-08-10 13:43 User:LiYu Email:[email protected] """ import os from PIL import Image import numpy as np from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.externals import joblib from 处理切分验证码图片 import * def img2data(num): X = [] Y = [] labels = getAllFileLabels(''.join(['cut_number/', str(num)])) for label in labels: img = Image.open(''.join(['cut_number/', str(num), '/', label, '.png'])) # convert("L")变为灰度图 img_gray = img.convert('L') img_gray = np.array(img_gray) w, h = img_gray.shape for x in range(w): for y in range(h): gray = img_gray[x, y] if gray <= 250: img_gray[x, y] = 0 else: img_gray[x, y] = 1 img_gray = np.reshape(img_gray, (1, -1))[0] # print("img_gray: ", img_gray) X.append(img_gray) Y.append(num) # print('X: ', X) # print('Y: ', Y) return X, Y def getAllFileLabels(dir): """获取文件夹里的所有图片名,只保留数字部分""" imgFileList = os.listdir(dir) labels = [] for imgFile in imgFileList: labels.append(imgFile.rstrip('.png')) return labels def loadData(): X = [] Y = [] for i in range(10): Xi, Yi = img2data(i) for x in Xi: X.append(x) for y in Yi: Y.append(y) X = np.array(X) Y = np.array(Y) print("X: ", X) print("Y: ", Y) return X, Y def generatrModel(X, Y): # X_train, X_test, Y_train, Y_test = train_test_split(X, Y) logReg = LogisticRegression(multi_class='ovr', solver='sag') logReg.fit(X, Y) print(logReg.score(X, Y)) joblib.dump(logReg, 'captcha_model/captcha_model.model') def getModel(): model = joblib.load('captcha_model/captcha_model.model') return model def captcha_predict(path): img_gray = binarization('captcha_images_test/' + path) noiseReduction(img_gray, path) labels = ['0', '1', '2', '3', '4'] img = Image.open('captcha_images_test/' + path) for i in range(5): pic = img.crop((100 * (1 + i), 170, 100 * (1 + i) + 100, 280)) plt.imshow(pic) pic.save(''.join(['captcha_images_test/', labels[i], '.png'])) result = '' model = getModel() for i in range(5): path = ''.join(['captcha_images_test/', labels[i], '.png']) img = Image.open(path) img_gray = img.convert('L') img_gray = np.array(img_gray) w, h = img_gray.shape for x in range(w): for y in range(h): gray = img_gray[x, y] if gray <= 250: img_gray[x, y] = 0 else: img_gray[x, y] = 1 img_gray = np.reshape(img_gray, (1, -1)) X = img_gray[0] Y_pre = model.predict([X]) result = ''.join([result, str(Y_pre[0])]) return result if __name__ == '__main__': # # 建模过程 # X, Y = loadData() # generatrModel(X, Y) # 测试 imgs = ['unknow2.png', 'unknow3.png', 'unknow4.png'] for img in imgs: result = captcha_predict(img) print(result)