基于卷积神经网络的手写数字图片识别,并用python的GUI代码实现

时间: 2023-09-27 10:06:33 浏览: 59
好的,下面是一个基于卷积神经网络的手写数字图片识别的Python代码实现,同时包含GUI界面的设计: ```python import numpy as np import tkinter as tk from PIL import Image, ImageDraw # 加载MNIST数据集 def load_data(): train_data = np.load('mnist_train_data.npy') train_label = np.load('mnist_train_label.npy') test_data = np.load('mnist_test_data.npy') test_label = np.load('mnist_test_label.npy') return train_data, train_label, test_data, test_label # 卷积神经网络设计 class CNN: def __init__(self): self.conv1_filters = 8 self.conv1_kernel = 3 self.conv2_filters = 16 self.conv2_kernel = 3 self.hidden_units = 128 self.learning_rate = 0.01 self.batch_size = 32 self.epochs = 10 self.input_shape = (28, 28, 1) self.output_shape = 10 self.conv1_weights = np.random.randn(self.conv1_kernel, self.conv1_kernel, self.input_shape[-1], self.conv1_filters) * 0.1 self.conv1_bias = np.zeros((1, 1, 1, self.conv1_filters)) self.conv2_weights = np.random.randn(self.conv2_kernel, self.conv2_kernel, self.conv1_filters, self.conv2_filters) * 0.1 self.conv2_bias = np.zeros((1, 1, 1, self.conv2_filters)) self.dense_weights = np.random.randn(self.hidden_units, self.output_shape) * 0.1 self.dense_bias = np.zeros((1, self.output_shape)) def relu(self, x): return np.maximum(x, 0) def softmax(self, x): exp_x = np.exp(x - np.max(x, axis=1, keepdims=True)) return exp_x / np.sum(exp_x, axis=1, keepdims=True) def convolution(self, x, w, b): h, w_, in_channels, out_channels = w.shape pad = (h - 1) // 2 x_pad = np.pad(x, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant') conv = np.zeros((x.shape[0], x.shape[1], x.shape[2], out_channels)) for i in range(x.shape[1]): for j in range(x.shape[2]): for k in range(out_channels): conv[:, i, j, k] = np.sum(x_pad[:, i:i+h, j:j+h, :] * w[:, :, :, k], axis=(1, 2, 3)) conv = conv + b return conv def max_pooling(self, x, pool_size=(2, 2)): h, w = pool_size pool = np.zeros((x.shape[0], x.shape[1] // h, x.shape[2] // w, x.shape[3])) for i in range(pool.shape[1]): for j in range(pool.shape[2]): pool[:, i, j, :] = np.max(x[:, i*h:i*h+h, j*w:j*w+w, :], axis=(1, 2)) return pool def forward(self, x): conv1 = self.convolution(x, self.conv1_weights, self.conv1_bias) relu1 = self.relu(conv1) pool1 = self.max_pooling(relu1) conv2 = self.convolution(pool1, self.conv2_weights, self.conv2_bias) relu2 = self.relu(conv2) pool2 = self.max_pooling(relu2) flatten = np.reshape(pool2, (pool2.shape[0], -1)) dense = np.dot(flatten, self.dense_weights) + self.dense_bias softmax = self.softmax(dense) return softmax def backward(self, x, y, y_pred): error = y_pred - y dense_grad = np.dot(x.T, error) / len(x) dense_bias_grad = np.mean(error, axis=0, keepdims=True) error = error.dot(self.dense_weights.T) error = np.reshape(error, (-1, int(np.sqrt(error.shape[-1])), int(np.sqrt(error.shape[-1])), self.conv2_filters)) error = error * (self.conv2_weights[np.newaxis, :, :, :, :]) error = np.sum(error, axis=3) error = error * (relu2 > 0) conv2_grad = np.zeros(self.conv2_weights.shape) h, w, in_channels, out_channels = self.conv2_weights.shape pad = (h - 1) // 2 x_pad = np.pad(pool1, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant') for i in range(pool1.shape[1]): for j in range(pool1.shape[2]): for k in range(out_channels): conv2_grad[:, :, :, k] += np.sum(x_pad[:, i:i+h, j:j+h, :] * error[:, i:i+1, j:j+1, k:k+1], axis=0) conv2_grad /= len(x) conv2_bias_grad = np.mean(np.mean(np.mean(error, axis=1, keepdims=True), axis=2, keepdims=True), axis=0, keepdims=True) error = error * (self.conv1_weights[np.newaxis, :, :, :, :]) error = np.sum(error, axis=3) error = error * (relu1 > 0) conv1_grad = np.zeros(self.conv1_weights.shape) h, w, in_channels, out_channels = self.conv1_weights.shape pad = (h - 1) // 2 x_pad = np.pad(x, ((0, 0), (pad, pad), (pad, pad), (0, 0)), 'constant') for i in range(x.shape[1]): for j in range(x.shape[2]): for k in range(out_channels): conv1_grad[:, :, :, k] += np.sum(x_pad[:, i:i+h, j:j+h, :] * error[:, i:i+1, j:j+1, k:k+1], axis=0) conv1_grad /= len(x) conv1_bias_grad = np.mean(np.mean(np.mean(error, axis=1, keepdims=True), axis=2, keepdims=True), axis=0, keepdims=True) return dense_grad, dense_bias_grad, conv1_grad, conv1_bias_grad, conv2_grad, conv2_bias_grad def train(self, x_train, y_train, x_val, y_val): num_batches = len(x_train) // self.batch_size for epoch in range(self.epochs): print('Epoch {}/{}'.format(epoch+1, self.epochs)) for batch in range(num_batches): x_batch = x_train[batch*self.batch_size:(batch+1)*self.batch_size] y_batch = y_train[batch*self.batch_size:(batch+1)*self.batch_size] y_pred = self.forward(x_batch) dense_grad, dense_bias_grad, conv1_grad, conv1_bias_grad, conv2_grad, conv2_bias_grad = self.backward(x_batch, y_batch, y_pred) self.dense_weights -= self.learning_rate * dense_grad self.dense_bias -= self.learning_rate * dense_bias_grad self.conv1_weights -= self.learning_rate * conv1_grad self.conv1_bias -= self.learning_rate * conv1_bias_grad self.conv2_weights -= self.learning_rate * conv2_grad self.conv2_bias -= self.learning_rate * conv2_bias_grad y_train_pred = self.predict(x_train) y_val_pred = self.predict(x_val) train_acc = np.mean(np.argmax(y_train, axis=1) == np.argmax(y_train_pred, axis=1)) val_acc = np.mean(np.argmax(y_val, axis=1) == np.argmax(y_val_pred, axis=1)) print('Train accuracy: {}, Validation accuracy: {}'.format(train_acc, val_acc)) def predict(self, x): y_pred = self.forward(x) return y_pred # GUI界面设计 class GUI: def __init__(self, cnn): self.cnn = cnn self.window = tk.Tk() self.window.title('Handwritten Digit Recognition') self.canvas = tk.Canvas(self.window, width=200, height=200, bg='white') self.canvas.grid(row=0, column=0, padx=10, pady=10) self.canvas.bind('<B1-Motion>', self.draw) self.button_recognize = tk.Button(self.window, text='Recognize', command=self.recognize) self.button_recognize.grid(row=0, column=1, padx=10, pady=10) self.button_clear = tk.Button(self.window, text='Clear', command=self.clear) self.button_clear.grid(row=1, column=1, padx=10, pady=10) self.label_result = tk.Label(self.window, text='Please draw a digit', font=('Helvetica', 18)) self.label_result.grid(row=1, column=0, padx=10, pady=10) def draw(self, event): x = event.x y = event.y r = 8 self.canvas.create_oval(x-r, y-r, x+r, y+r, fill='black') def clear(self): self.canvas.delete('all') self.label_result.config(text='Please draw a digit') def recognize(self): image = Image.new('L', (200, 200), 'white') draw = ImageDraw.Draw(image) draw.rectangle((0, 0, 200, 200), fill='white') self.canvas.postscript(file='tmp.eps', colormode='color') eps_image = Image.open('tmp.eps') image.paste(eps_image, (0, 0)) image = image.resize((28, 28)) image = np.array(image) image = image.reshape((1, 28, 28, 1)) y_pred = self.cnn.predict(image) label = np.argmax(y_pred) self.label_result.config(text='Result: {}'.format(label)) def run(self): self.window.mainloop() # 主程序 if __name__ == '__main__': train_data, train_label, test_data, test_label = load_data() cnn = CNN() cnn.train(train_data, train_label, test_data, test_label) gui = GUI(cnn) gui.run() ``` 请注意,该代码实现需要下载MNIST数据集(包括四个.npy文件),并且需要安装Python的`numpy`、`tkinter`和`Pillow`库。

相关推荐

最新推荐

recommend-type

python实现基于SVM手写数字识别功能

主要为大家详细介绍了python实现基于SVM手写数字识别功能,具有一定的参考价值,感兴趣的小伙伴们可以参考一下
recommend-type

手写数字识别(python底层实现)报告.docx

(1)认识MNIST数据集的数据格式,对...(2)利用python语言从零开始搭建多层感知机网络; (3) 通过调整参数提高多层感知机网络的准确度,并对实验结果进行评估; (4)程序的语句要求有注释,以增强程序可读性。
recommend-type

Python实现识别手写数字 Python图片读入与处理

主要为大家详细介绍了Python实现识别手写数字,Python图片的读入与处理,具有一定的参考价值,感兴趣的小伙伴们可以参考一下
recommend-type

手写数字识别:实验报告

AIstudio手写数字识别项目的实验报告,报告中有代码链接。文档包括: 1.数据预处理 2.数据加载 3.网络结构尝试:简单的多层感知器、卷积神经网络LeNet-5、循环神经网络RNN、Vgg16 4.损失函数:平方损失函数、交叉...
recommend-type

【深度学习入门】Paddle实现手写数字识别详解(基于DenseNet)

【深度学习入门】Paddle实现手写数字识别(基于DenseNet)0. 闲言碎语:1. MNIST 数据集:2. DenseNet 详解:2.1 ResNet(颠覆性的残差结构):2.2 DenseNet(跨层链接的极致):3. 代码: 0. 闲言碎语: OK,因为...
recommend-type

zigbee-cluster-library-specification

最新的zigbee-cluster-library-specification说明文档。
recommend-type

管理建模和仿真的文件

管理Boualem Benatallah引用此版本:布阿利姆·贝纳塔拉。管理建模和仿真。约瑟夫-傅立叶大学-格勒诺布尔第一大学,1996年。法语。NNT:电话:00345357HAL ID:电话:00345357https://theses.hal.science/tel-003453572008年12月9日提交HAL是一个多学科的开放存取档案馆,用于存放和传播科学研究论文,无论它们是否被公开。论文可以来自法国或国外的教学和研究机构,也可以来自公共或私人研究中心。L’archive ouverte pluridisciplinaire
recommend-type

实现实时数据湖架构:Kafka与Hive集成

![实现实时数据湖架构:Kafka与Hive集成](https://img-blog.csdnimg.cn/img_convert/10eb2e6972b3b6086286fc64c0b3ee41.jpeg) # 1. 实时数据湖架构概述** 实时数据湖是一种现代数据管理架构,它允许企业以低延迟的方式收集、存储和处理大量数据。与传统数据仓库不同,实时数据湖不依赖于预先定义的模式,而是采用灵活的架构,可以处理各种数据类型和格式。这种架构为企业提供了以下优势: - **实时洞察:**实时数据湖允许企业访问最新的数据,从而做出更明智的决策。 - **数据民主化:**实时数据湖使各种利益相关者都可
recommend-type

list根据id查询pid 然后依次获取到所有的子节点数据

可以使用递归的方式来实现根据id查询pid并获取所有子节点数据。具体实现可以参考以下代码: ``` def get_children_nodes(nodes, parent_id): children = [] for node in nodes: if node['pid'] == parent_id: node['children'] = get_children_nodes(nodes, node['id']) children.append(node) return children # 测试数
recommend-type

JSBSim Reference Manual

JSBSim参考手册,其中包含JSBSim简介,JSBSim配置文件xml的编写语法,编程手册以及一些应用实例等。其中有部分内容还没有写完,估计有生之年很难看到完整版了,但是内容还是很有参考价值的。