Opencv-python实现的手写数字识别程序
·
这次实现的数字识别是基于KNN分类算法的一款识别。
利用KNN算法我们训练了5000个数字,0~9各500个,将其中前250个作为训练集,后250个作为测试集进行测试得到最终的准确率
整个程序的训练数据都来自OpenCV的自带的一张图片digits.png(在文件夹opencv/samples/data/中),这张图片里面就有5000个手写数字,每个数字都是20x20的图像,没有OpenCV的可以用我给出的这张图片👇
当然,如果有想下载完整版OpenCV的朋友,觉得下载速度很慢的可以参考我之前的博文:关于加快OpenCV下载速度的解决方法
先附上官网代码:
import numpy as np
import cv2 as cv
img = cv.imread('digits.png')
gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
# 现在我们将图像分割为5000个单元格,每个单元格为20x20
cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)]
# 使其成为一个Numpy数组。它的大小将是(50,100,20,20)
x = np.array(cells)
# 现在我们准备train_data和test_data。
train = x[:,:50].reshape(-1,400).astype(np.float32) # Size = (2500,400)
test = x[:,50:100].reshape(-1,400).astype(np.float32) # Size = (2500,400)
# 为训练和测试数据创建标签
k = np.arange(10)
train_labels = np.repeat(k,250)[:,np.newaxis]
test_labels = train_labels.copy()
# 初始化kNN,训练数据,然后使用k = 1的测试数据对其进行测试
knn = cv.ml.KNearest_create()
knn.train(train, cv.ml.ROW_SAMPLE, train_labels)
ret,result,neighbours,dist = knn.findNearest(test,k=5)
# 现在,我们检查分类的准确性
#为此,将结果与test_labels进行比较,并检查哪个错误
matches = result==test_labels
correct = np.count_nonzero(matches)
accuracy = correct*100.0/result.size
print( accuracy )
经过一次训练之后,我们就可以将训练数据保存,保存之后下次我们使用时便可以直接读取我们保存文件的数据了。将以下代码加入到上面程序中就可以实现保存。
# 保存数据
np.savez('knn_data.npz',train=train, train_labels=train_labels)
# 现在加载数据
with np.load('knn_data.npz') as data:
print( data.files )
train = data['train']
train_labels = data['train_labels']
这个程序保存完一般需要4M大小的空间,为了节省空间,我们可以将数据格式变为np.uint8,在使用时再转为float32
test = test.astype(np.uint8)
train = train.astype(np.uint8)
train_labels = train_labels.astype(np.uint8)
np.savez('knn_data.npz',train=train, train_labels=train_labels,test=test)
以上代码可以实现对数字的训练,并且测试数字识别的准确率,但是如果我想试试能不能认出我妖娆字体呢?于是我就修改了下程序。
修改版:
1、所需库
- numpy
- opencv-python
2、实现目标
用鼠标在画布上手写数字,利用程序进行识别,如果识别出现错误,可以将正确的数字添加进入训练集
3、运行效果
可以看出来。。。他喵的人工智障😀
当然,你可以通过不断的向里面添加数字,不断的提升他的识别率(那得添加很多很多)。也可以多找些训练集进行训练。
4、实现代码
# coding=utf-8
import numpy as np
import cv2 as cv
drawing = False #按下鼠标则为真
filepath = 'C:/Users/11037/Desktop/knn_data.npz'
def nothing(x):
pass
def draw(event,x,y,flags,param):
global drawing
if event == cv.EVENT_LBUTTONDOWN: #响应鼠标按下
drawing = True
elif event == cv.EVENT_MOUSEMOVE: #响应鼠标移动
if drawing == True:
img[y:y+20,x:x+20] = (255,255,255)
elif event == cv.EVENT_LBUTTONUP: #响应鼠标松开
drawing = False
# 创建一个黑色的图像,一个窗口
img = np.zeros((300,300,3), np.uint8)
cv.namedWindow('image')
# 创建颜色变化的轨迹栏
accuracy = 'accuracy'
clear = 'clear'
distinguish = 'distinguish'
append = 'append'
right = 'right'
data = np.load(filepath)
train = data['train'].astype(np.float32)
train_labels = data['train_labels'].astype(np.float32)
test = data['test'].astype(np.float32)
test_labels = train_labels.copy()
knn = cv.ml.KNearest_create()
knn.train(train, cv.ml.ROW_SAMPLE, train_labels)
#创建轨迹条
cv.createTrackbar(right,'image',0,9,nothing) #所写之字的正确数字
cv.createTrackbar(append,'image',0,1,nothing) #加入训练集中
cv.createTrackbar(distinguish,'image',0,1,nothing) #识别数字
cv.createTrackbar(clear,'image',0,1,nothing) #清空画布
cv.createTrackbar(accuracy, 'image',0,1,nothing) #计算识别率
cv.setMouseCallback('image',draw)
img[:] = (0,0,0) #将画板设为黑色
while(1):
cv.imshow('image',img)
if cv.waitKey(1)&0xFF == 27:
break
ac = cv.getTrackbarPos(accuracy,'image')
c = cv.getTrackbarPos(clear,'image')
d = cv.getTrackbarPos(distinguish,'image')
a = cv.getTrackbarPos(append,'image')
#测试正确率
if ac == 1:
cv.setTrackbarPos(accuracy, 'image', 0)
data = np.load(filepath)
train = data['train'].astype(np.float32)
train_labels = data['train_labels'].astype(np.float32)
test = data['test'].astype(np.float32)
test_labels = train_labels.copy()
ret, result, neighbours, dist = knn.findNearest(test, k=1)
matches = result == test_labels
correct = np.count_nonzero(matches) # 计算矩阵matches里非零元素个数
accura = correct * 100.0 / result.size
print('{:.3f}'.format(accura))
#清空画布
if c == 1:
cv.setTrackbarPos(clear, 'image', 0)
img[:] = (0,0,0)
if d == 1: #识别数字
cv.setTrackbarPos(distinguish,'image',0)
testimg = img.copy()
testimg = cv.resize(testimg,(20,20))
gray = cv.cvtColor(testimg,cv.COLOR_BGR2GRAY)
x = np.array(gray).reshape(-1, 400).astype(np.float32)
ret, result, neighbours, dist = knn.findNearest(x, k=5)
cv.putText(img, str(result[0][0]), (5, 25), cv.FONT_HERSHEY_COMPLEX_SMALL, 1, (100, 200, 200), 1)
if a == 1: #加入训练集中
img[0:50,0:50] = (0,0,0)
cv.setTrackbarPos(append,'image',0)
r = cv.getTrackbarPos(right, 'image')
print('已加入数字 ', str(r))
r = np.array([[r]]).astype(np.float32)
testimg = img.copy()
testimg = cv.resize(testimg, (20, 20))
gray = cv.cvtColor(testimg, cv.COLOR_BGR2GRAY)
x = np.array(gray).reshape(-1, 400).astype(np.uint8)
#将新数据加入训练集中
train = np.append(train,x,axis=0).astype(np.uint8)
train_labels = np.append(train_labels,r,axis=0).astype(np.uint8)
test = np.append(test,x,axis=0).astype(np.uint8)
#存储进文件
np.savez(filepath, train=train, train_labels=train_labels, test=test)
cv.destroyAllWindows()
备注:运行这个程序之前一定要先运行第一个程序
更多推荐
已为社区贡献3条内容
所有评论(0)