手写数字识别使用卷积神经网络纯numpy实现
时间: 2023-12-04 13:04:19 浏览: 142
手写数字识别使用卷积神经网络纯numpy实现需要经过以下步骤:
1.准备数据集:使用MNIST数据集,其中包含60,000个训练图像和10,000个测试图像。
2.数据预处理:将图像数据进行标准化处理,将标签数据进行one-hot编码。
3.构建卷积神经网络模型:使用numpy实现卷积层、池化层、全连接层和softmax层。
4.训练模型:使用训练集对模型进行训练。
5.测试模型:使用测试集对模型进行测试,并计算分类准确率。
下面是一个具体的代码实现:
```python
import numpy as np
import mnist #pip install python-mnist
# 数据预处理
def normalize(x):
x = x / 255.
return x
def one_hot(y):
return np.eye(10)[y]
# 加载数据集
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()
# 数据预处理
train_images = normalize(train_images)
test_images = normalize(test_images)
train_labels = one_hot(train_labels)
test_labels = one_hot(test_labels)
# 构建卷积神经网络模型
class ConvolutionalNeuralNetwork:
def __init__(self):
self.conv1_filters = 8
self.conv1_filter_size = 3
self.conv2_filters = 16
self.conv2_filter_size = 3
self.fc1_units = 128
self.output_units = 10
self.conv1_weights = np.random.randn(self.conv1_filters, self.conv1_filter_size, self.conv1_filter_size)
self.conv1_bias = np.zeros((self.conv1_filters,))
self.conv2_weights = np.random.randn(self.conv2_filters, self.conv2_filter_size, self.conv2_filter_size)
self.conv2_bias = np.zeros((self.conv2_filters,))
self.fc1_weights = np.random.randn(7*7*self.conv2_filters, self.fc1_units)
self.fc1_bias = np.zeros((self.fc1_units,))
self.output_weights = np.random.randn(self.fc1_units, self.output_units)
self.output_bias = np.zeros((self.output_units,))
def conv2d(self, x, weight, bias):
filter_size = weight.shape[1]
output_size = x.shape[0] - filter_size + 1
output = np.zeros((output_size, output_size))
for i in range(output_size):
for j in range(output_size):
output[i, j] = np.sum(x[i:i+filter_size, j:j+filter_size] * weight) + bias
return output
def max_pool2d(self, x, size):
output_size = x.shape[0] // size
output = np.zeros((output_size, output_size))
for i in range(output_size):
for j in range(output_size):
output[i, j] = np.max(x[i*size:i*size+size, j*size:j*size+size])
return output
def relu(self, x):
return np.maximum(x, 0)
def softmax(self, x):
exp_x = np.exp(x)
return exp_x / np.sum(exp_x)
def forward(self, x):
x = x.reshape((28, 28))
x = self.conv2d(x, self.conv1_weights, self.conv1_bias)
x = self.relu(x)
x = self.max_pool2d(x, 2)
x = self.conv2d(x, self.conv2_weights, self.conv2_bias)
x = self.relu(x)
x = self.max_pool2d(x, 2)
x = x.reshape((-1,))
x = np.dot(x, self.fc1_weights) + self.fc1_bias
x = self.relu(x)
x = np.dot(x, self.output_weights) + self.output_bias
x = self.softmax(x)
return x
def train(self, x, y, learning_rate):
# 前向传播
x = x.reshape((28, 28))
conv1_output = self.conv2d(x, self.conv1_weights, self.conv1_bias)
conv1_output_relu = self.relu(conv1_output)
max_pool1_output = self.max_pool2d(conv1_output_relu, 2)
conv2_output = self.conv2d(max_pool1_output, self.conv2_weights, self.conv2_bias)
conv2_output_relu = self.relu(conv2_output)
max_pool2_output = self.max_pool2d(conv2_output_relu, 2)
fc1_input = max_pool2_output.reshape((-1,))
fc1_output = np.dot(fc1_input, self.fc1_weights) + self.fc1_bias
fc1_output_relu = self.relu(fc1_output)
output_input = np.dot(fc1_output_relu, self.output_weights) + self.output_bias
output_output = self.softmax(output_input)
# 反向传播
output_error = output_output - y
output_delta = output_error
fc1_error = np.dot(output_delta, self.output_weights.T)
fc1_delta = fc1_error * (fc1_output_relu > 0)
fc1_weights_grad = np.outer(fc1_input, fc1_delta)
fc1_bias_grad = fc1_delta
conv2_error = fc1_delta.reshape((7, 7, self.conv2_filters))
conv2_delta = conv2_error * (conv2_output_relu > 0)
conv2_weights_grad = np.zeros_like(self.conv2_weights)
for i in range(self.conv2_filters):
conv2_weights_grad[i] = np.sum(max_pool1_output[:, :, i:i+1] * conv2_delta, axis=2)
conv2_bias_grad = np.sum(conv2_delta, axis=(0,1))
max_pool1_error = self.conv2d(conv2_delta, np.ones((2,2)), np.zeros((1,)))
conv1_delta = max_pool1_error * (conv1_output_relu > 0)
conv1_weights_grad = np.zeros_like(self.conv1_weights)
for i in range(self.conv1_filters):
conv1_weights_grad[i] = np.sum(x[:, :, i:i+1] * conv1_delta, axis=2)
conv1_bias_grad = np.sum(conv1_delta, axis=(0,1))
# 权重更新
self.conv1_weights -= learning_rate * conv1_weights_grad
self.conv1_bias -= learning_rate * conv1_bias_grad
self.conv2_weights -= learning_rate * conv2_weights_grad
self.conv2_bias -= learning_rate * conv2_bias_grad
self.fc1_weights -= learning_rate * fc1_weights_grad
self.fc1_bias -= learning_rate * fc1_bias_grad
self.output_weights -= learning_rate * np.outer(fc1_output_relu, output_delta)
self.output_bias -= learning_rate * output_delta
def predict(self, x):
y_pred = np.zeros((len(x), 10))
for i in range(len(x)):
y_pred[i] = self.forward(x[i])
return y_pred
# 训练模型
model = ConvolutionalNeuralNetwork()
batch_size = 32
learning_rate = 0.1
num_epochs = 5
num_batches = len(train_images) // batch_size
for epoch in range(num_epochs):
for batch in range(num_batches):
batch_start = batch * batch_size
batch_end = (batch + 1) * batch_size
x_batch = train_images[batch_start:batch_end]
y_batch = train_labels[batch_start:batch_end]
model.train(x_batch, y_batch, learning_rate)
y_pred = model.predict(train_images)
accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(train_labels, axis=1))
print("Epoch: {}, Accuracy: {:.3f}".format(epoch+1, accuracy))
# 测试模型
y_pred = model.predict(test_images)
accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(test_labels, axis=1))
print("Test Accuracy: {:.3f}".format(accuracy))
```
这个代码实现了一个包含2个卷积层、2个池化层和1个全连接层的卷积神经网络模型,并使用MNIST数据集进行训练和测试。你可以根据自己的需求修改模型结构和训练参数。
阅读全文