误差反向传播法的实现 输出使用后4000个样本训练得到模型在测试集上的结果,画出训练数据的识别精度和测试数据的识别精度图像,参数设置为:batch:300;iters_num:2000;lambda:0.1
时间: 2023-11-28 16:48:14 浏览: 130
机器学习-误差反向传播
误差反向传播法(Backpropagation)是一种常见的神经网络训练算法,用于计算损失函数对于模型参数的梯度,从而进行模型参数的优化。
下面是一个简单的基于Python的误差反向传播法实现示例:
```python
import numpy as np
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, std=1e-4):
self.params = {}
self.params['W1'] = std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def loss(self, X, y, reg):
N, D = X.shape
# Forward pass
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
scores = None
#############################################################################
# TODO: Implement the forward pass for the two-layer net, computing the #
# class scores for X and storing them in the scores variable. #
#############################################################################
z1 = X.dot(W1) + b1
a1 = np.maximum(0, z1)
scores = a1.dot(W2) + b2
#############################################################################
# END OF YOUR CODE #
#############################################################################
# Compute the loss
loss = None
#############################################################################
# TODO: Implement the backward pass for the two-layer net. Store the loss #
# in the loss variable and gradients in the grads dictionary. Compute data #
# loss using softmax, and make sure that grads[k] holds the gradients for #
# self.params[k]. Don't forget to add L2 regularization! #
#############################################################################
exp_scores = np.exp(scores)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
correct_logprobs = -np.log(probs[range(N), y])
data_loss = np.sum(correct_logprobs) / N
reg_loss = 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
loss = data_loss + reg_loss
# Backward pass
grads = {}
dscores = probs
dscores[range(N), y] -= 1
dscores /= N
dW2 = a1.T.dot(dscores)
db2 = np.sum(dscores, axis=0, keepdims=True)
dhidden = dscores.dot(W2.T)
dhidden[a1 <= 0] = 0
dW1 = X.T.dot(dhidden)
db1 = np.sum(dhidden, axis=0)
dW2 += reg * W2
dW1 += reg * W1
grads['W1'] = dW1
grads['b1'] = db1
grads['W2'] = dW2
grads['b2'] = db2
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, grads
def train(self, X, y, X_val, y_val,
learning_rate=1e-3, learning_rate_decay=0.95,
reg=5e-6, num_iters=100,
batch_size=200, verbose=False):
num_train = X.shape[0]
iterations_per_epoch = max(num_train / batch_size, 1)
# Use SGD to optimize the parameters in self.model
loss_history = []
train_acc_history = []
val_acc_history = []
for it in range(num_iters):
X_batch = None
y_batch = None
#########################################################################
# TODO: Create a random minibatch of training data and labels, storing #
# them in X_batch and y_batch respectively. #
#########################################################################
batch_indices = np.random.choice(num_train, batch_size)
X_batch = X[batch_indices]
y_batch = y[batch_indices]
#########################################################################
# END OF YOUR CODE #
#########################################################################
# Compute loss and gradients using the current minibatch
loss, grads = self.loss(X_batch, y_batch, reg)
loss_history.append(loss)
# Update the parameters using the gradient descent
for param_name in self.params:
self.params[param_name] -= learning_rate * grads[param_name]
# Every epoch, check accuracy on the training and validation set
if verbose and it % iterations_per_epoch == 0:
train_acc = (self.predict(X_batch) == y_batch).mean()
val_acc = (self.predict(X_val) == y_val).mean()
train_acc_history.append(train_acc)
val_acc_history.append(val_acc)
print('iteration %d / %d: loss %f, train_acc: %f, val_acc: %f'
% (it, num_iters, loss, train_acc, val_acc))
# Decay learning rate
learning_rate *= learning_rate_decay
return {
'loss_history': loss_history,
'train_acc_history': train_acc_history,
'val_acc_history': val_acc_history,
}
def predict(self, X):
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
# Forward pass
z1 = X.dot(W1) + b1
a1 = np.maximum(0, z1)
scores = a1.dot(W2) + b2
# Predict class labels
y_pred = np.argmax(scores, axis=1)
return y_pred
```
接下来,我们可以使用上述实现的神经网络模型对给定的数据集进行训练和测试,并绘制出训练精度和测试精度的变化趋势:
```python
import matplotlib.pyplot as plt
# Load data
X_train, y_train, X_test, y_test = load_data()
# Initialize model
input_size = X_train.shape[1]
hidden_size = 50
num_classes = 10
net = TwoLayerNet(input_size, hidden_size, num_classes)
# Train model
stats = net.train(X_train, y_train, X_test, y_test,
num_iters=2000, batch_size=300,
learning_rate=1e-3, learning_rate_decay=0.95,
reg=0.1, verbose=True)
# Plot the loss function and train / validation accuracies
plt.subplot(2, 1, 1)
plt.plot(stats['loss_history'])
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.subplot(2, 1, 2)
plt.plot(stats['train_acc_history'], label='train')
plt.plot(stats['val_acc_history'], label='val')
plt.title('Classification accuracy history')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend()
plt.show()
```
运行上述代码后,我们可以得到训练精度和测试精度随着迭代次数的变化趋势:
阅读全文