实现全连接神经网络,该网络包括4个Affine仿射层,每一层的神经元个数分别为100,100,50,10;3个ReLU激活层以及一个Softmax层,并采用Cross-Entropy Loss损失函数。使用SGD迭代法更新网络参数。利用此神经网络实现MNIST数据集识别
时间: 2024-03-02 11:49:01 浏览: 71
好的,我理解了你的问题。对于这个问题,我可以给你一些思路和参考代码。
首先,你可以使用Python和NumPy库来实现全连接神经网络。你需要定义一个神经网络类,包括初始化函数、前向传播函数和反向传播函数。
以下是一个简单的实现示例,你可以根据需要进行修改:
```python
import numpy as np
class NeuralNetwork:
def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
# 初始化权重和偏置
self.W1 = np.random.randn(input_size, hidden_size1) * 0.01
self.b1 = np.zeros((1, hidden_size1))
self.W2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
self.b2 = np.zeros((1, hidden_size2))
self.W3 = np.random.randn(hidden_size2, hidden_size3) * 0.01
self.b3 = np.zeros((1, hidden_size3))
self.W4 = np.random.randn(hidden_size3, output_size) * 0.01
self.b4 = np.zeros((1, output_size))
def forward(self, X):
# 前向传播
self.Z1 = np.dot(X, self.W1) + self.b1
self.A1 = np.maximum(0, self.Z1) # ReLU激活函数
self.Z2 = np.dot(self.A1, self.W2) + self.b2
self.A2 = np.maximum(0, self.Z2) # ReLU激活函数
self.Z3 = np.dot(self.A2, self.W3) + self.b3
self.A3 = np.maximum(0, self.Z3) # ReLU激活函数
self.Z4 = np.dot(self.A3, self.W4) + self.b4
self.softmax = np.exp(self.Z4) / np.sum(np.exp(self.Z4), axis=1, keepdims=True) # Softmax激活函数
def backward(self, X, y, learning_rate):
# 反向传播
m = X.shape[0]
dZ4 = self.softmax - y
dW4 = np.dot(self.A3.T, dZ4) / m
db4 = np.sum(dZ4, axis=0, keepdims=True) / m
dA3 = np.dot(dZ4, self.W4.T)
dZ3 = np.multiply(dA3, np.int64(self.A3 > 0))
dW3 = np.dot(self.A2.T, dZ3) / m
db3 = np.sum(dZ3, axis=0, keepdims=True) / m
dA2 = np.dot(dZ3, self.W3.T)
dZ2 = np.multiply(dA2, np.int64(self.A2 > 0))
dW2 = np.dot(self.A1.T, dZ2) / m
db2 = np.sum(dZ2, axis=0, keepdims=True) / m
dA1 = np.dot(dZ2, self.W2.T)
dZ1 = np.multiply(dA1, np.int64(self.A1 > 0))
dW1 = np.dot(X.T, dZ1) / m
db1 = np.sum(dZ1, axis=0, keepdims=True) / m
# 更新权重和偏置
self.W4 -= learning_rate * dW4
self.b4 -= learning_rate * db4
self.W3 -= learning_rate * dW3
self.b3 -= learning_rate * db3
self.W2 -= learning_rate * dW2
self.b2 -= learning_rate * db2
self.W1 -= learning_rate * dW1
self.b1 -= learning_rate * db1
def train(self, X, y, epochs, learning_rate):
# 训练神经网络
for i in range(epochs):
self.forward(X)
cost = -np.sum(y * np.log(self.softmax))
self.backward(X, y, learning_rate)
if i % 100 == 0:
print("Epoch {}: cost = {}".format(i, cost))
def predict(self, X):
# 预测
self.forward(X)
return np.argmax(self.softmax, axis=1)
```
然后,你可以使用MNIST数据集进行训练和测试。你需要下载MNIST数据集,然后将其转换为NumPy数组。以下是一个简单的示例:
```python
import numpy as np
import gzip
def load_data():
with gzip.open('mnist/train-images-idx3-ubyte.gz', 'rb') as f:
X_train = np.frombuffer(f.read(), dtype=np.uint8, offset=16).reshape(-1, 784)
with gzip.open('mnist/train-labels-idx1-ubyte.gz', 'rb') as f:
y_train = np.frombuffer(f.read(), dtype=np.uint8, offset=8)
with gzip.open('mnist/t10k-images-idx3-ubyte.gz', 'rb') as f:
X_test = np.frombuffer(f.read(), dtype=np.uint8, offset=16).reshape(-1, 784)
with gzip.open('mnist/t10k-labels-idx1-ubyte.gz', 'rb') as f:
y_test = np.frombuffer(f.read(), dtype=np.uint8, offset=8)
return X_train, y_train, X_test, y_test
X_train, y_train, X_test, y_test = load_data()
```
接下来,你需要将像素值归一化到0到1之间,并将标签转换为one-hot编码。以下是一个简单的示例:
```python
def preprocess_data(X_train, y_train, X_test, y_test):
X_train = X_train / 255
X_test = X_test / 255
y_train_onehot = np.zeros((y_train.shape[0], 10))
y_train_onehot[np.arange(y_train.shape[0]), y_train] = 1
y_test_onehot = np.zeros((y_test.shape[0], 10))
y_test_onehot[np.arange(y_test.shape[0]), y_test] = 1
return X_train, y_train_onehot, X_test, y_test_onehot
X_train, y_train_onehot, X_test, y_test_onehot = preprocess_data(X_train, y_train, X_test, y_test)
```
最后,你可以创建一个神经网络对象,并使用训练数据对其进行训练。以下是一个简单的示例:
```python
nn = NeuralNetwork(784, 100, 100, 50, 10)
nn.train(X_train, y_train_onehot, 1000, 0.01)
```
在训练完成后,你可以使用测试数据对其进行测试,并计算准确率。以下是一个简单的示例:
```python
y_pred = nn.predict(X_test)
accuracy = np.mean(y_pred == np.argmax(y_test_onehot, axis=1))
print("Accuracy = {}".format(accuracy))
```
希望这些代码可以帮助你实现全连接神经网络并应用于MNIST数据集识别。
阅读全文