帮我写一段python代码,要求在手写数字数据集上训练模型,数据集分为训练集和测试集,训练集标签的5000维向量label,“0”的数字标记为“10”,而“1”到“9”的数字按自然顺序标记为“1”到“9”。要求采用全连接神经网络进行分类。神经网络中输入层784(28*28)个节点,隐层12个节点(包含偏置项),输出层10个节点(包含偏置项),隐层和输出层均采用sigmoid激活函数,损失函数为均方损失函数。采用标准正态分布初始化权重和阈值参数,梯度下降最大迭代次数设置为1000,对比学习率为0.001,0.005,0.01时模型的损失函数迭代曲线和模型在测试集上的精度(accuracy)。
时间: 2023-06-06 22:05:05 浏览: 103
好的,以下是Python代码:
```
import numpy as np
matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
# 加载手写数字数据集
digits = load_digits().data
labels = load_digits().target
# 对标签进行处理
for i in range(len(labels)):
if labels[i] == 0:
labels[i] = 10
else:
labels[i] = labels[i]
# 将数据集分为训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(digits, labels, test_size=0.2)
# 定义sigmoid函数
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# 初始化参数
def initialize_parameters():
np.random.seed(1)
w1 = np.random.randn(12, 784) * np.sqrt(2/784)
b1 = np.zeros((12, 1))
w2 = np.random.randn(10, 12) * np.sqrt(2/12)
b2 = np.zeros((10, 1))
parameters = {"W1": w1,"b1": b1,"W2": w2,"b2": b2}
return parameters
# 前向传播
def forward_propagation(X, parameters):
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
Z1 = np.dot(W1,X.T) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(W2,A1) + b2
A2 = sigmoid(Z2)
cache = {"Z1": Z1,"A1": A1,"Z2": Z2,"A2": A2}
return A2, cache
# 计算损失
def compute_cost(A2, Y):
m = Y.shape[0]
cost = np.sum((A2-Y)**2) / m
return cost
# 反向传播
def backward_propagation(parameters, cache, X, Y):
m = X.shape[0]
W2 = parameters["W2"]
A1 = cache["A1"]
A2 = cache["A2"]
dZ2 = A2 - Y.T
dW2 = np.dot(dZ2, A1.T) / m
db2 = np.sum(dZ2, axis=1, keepdims=True) / m
dZ1 = np.dot(W2.T, dZ2) * (A1 * (1-A1))
dW1 = np.dot(dZ1, X) / m
db1 = np.sum(dZ1, axis=1, keepdims=True) / m
grads = {"dW1": dW1,"db1": db1,"dW2": dW2,"db2": db2}
return grads
# 更新参数
def update_parameters(parameters, grads, learning_rate):
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
dW1 = grads["dW1"]
db1 = grads["db1"]
dW2 = grads["dW2"]
db2 = grads["db2"]
W1 -= learning_rate*dW1
b1 -= learning_rate*db1
W2 -= learning_rate*dW2
b2 -= learning_rate*db2
parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2}
return parameters
# 训练模型
def nn_model(X, Y, n_h, num_iterations, learning_rate):
np.random.seed(3)
n_x = 784
n_y = 10
parameters = initialize_parameters()
for i in range(num_iterations):
A2, cache = forward_propagation(X, parameters)
cost = compute_cost(A2, Y)
grads = backward_propagation(parameters, cache, X, Y)
parameters = update_parameters(parameters, grads, learning_rate)
if i % 100 == 0:
print("迭代次数: ", i, ",损失函数值为: ", cost)
return parameters
# 预测函数
def predict(X, parameters):
A2, cache = forward_propagation(X, parameters)
predictions = np.argmax(A2, axis=0)
return predictions
# 计算准确率
def accuracy(predictions, Y):
m = Y.shape[0]
accuracy = np.sum(predictions==Y)/m
return accuracy
# 定义学习率和迭代次数
learning_rates = [0.001, 0.005, 0.01]
num_iterations = 1000
# 训练模型,得到模型参数,计算损失函数曲线以及测试集上的准确率
for lr in learning_rates:
print("学习率为: ", lr)
parameters = nn_model(x_train, y_train, 12, num_iterations, lr)
predictions_train = predict(x_train, parameters)
predictions_test = predict(x_test, parameters)
accuracy_train = accuracy(predictions_train, y_train)
accuracy_test = accuracy(predictions_test, y_test)
print("训练集的准确率为: ", accuracy_train)
print("测试集的准确率为: ", accuracy_test)
plt.plot(range(num_iterations), costs)
plt.xlabel("迭代次数")
plt.ylabel("损失函数")
plt.title("学习率为:" + str(lr))
plt.show()
```
注意:为了简化代码,没有考虑正则化等问题。
阅读全文