C++实现minst识别
时间: 2023-11-26 19:06:07 浏览: 102
要使用C++实现MINST识别,需要以下步骤:
1. 下载MINST数据集,包括训练集和测试集。
2. 将数据集转换为C++程序可以读取的格式。可以将图片数据转换为数组,标签数据转换为向量,保存到文件中。
3. 设计神经网络模型,可以选择使用已有的框架(如TensorFlow、Caffe等),或者自己设计模型。
4. 定义损失函数,常用的是交叉熵损失函数。
5. 使用梯度下降等优化算法,训练模型。
6. 对测试集进行预测,计算准确率。
以下是一个简单的示例代码:
```C++
#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
using namespace std;
// 定义神经网络结构
const int input_size = 784;
const int hidden_size = 30;
const int output_size = 10;
// 定义激活函数
double sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
// 定义神经网络类
class NeuralNetwork {
public:
NeuralNetwork() {
// 初始化参数
w1.resize(input_size, vector<double>(hidden_size));
w2.resize(hidden_size, vector<double>(output_size));
b1.resize(hidden_size);
b2.resize(output_size);
for (int i = 0; i < input_size; ++i) {
for (int j = 0; j < hidden_size; ++j) {
w1[i][j] = rand() / double(RAND_MAX);
}
}
for (int i = 0; i < hidden_size; ++i) {
for (int j = 0; j < output_size; ++j) {
w2[i][j] = rand() / double(RAND_MAX);
}
}
for (int i = 0; i < hidden_size; ++i) {
b1[i] = rand() / double(RAND_MAX);
}
for (int i = 0; i < output_size; ++i) {
b2[i] = rand() / double(RAND_MAX);
}
}
// 前向传播
vector<double> forward(vector<double> x) {
vector<double> z1(hidden_size);
vector<double> a1(hidden_size);
vector<double> z2(output_size);
vector<double> a2(output_size);
for (int j = 0; j < hidden_size; ++j) {
for (int i = 0; i < input_size; ++i) {
z1[j] += x[i] * w1[i][j];
}
z1[j] += b1[j];
a1[j] = sigmoid(z1[j]);
}
for (int k = 0; k < output_size; ++k) {
for (int j = 0; j < hidden_size; ++j) {
z2[k] += a1[j] * w2[j][k];
}
z2[k] += b2[k];
a2[k] = sigmoid(z2[k]);
}
return a2;
}
// 计算损失函数
double loss(vector<vector<double>> x, vector<vector<double>> y) {
double L = 0.0;
int N = x.size();
for (int n = 0; n < N; ++n) {
vector<double> a2 = forward(x[n]);
for (int k = 0; k < output_size; ++k) {
L += y[n][k] * log(a2[k]) + (1 - y[n][k]) * log(1 - a2[k]);
}
}
return -L / N;
}
// 训练模型
void train(vector<vector<double>> x, vector<vector<double>> y, double learning_rate, int epochs) {
int N = x.size();
for (int epoch = 1; epoch <= epochs; ++epoch) {
double L = 0.0;
for (int n = 0; n < N; ++n) {
// 前向传播
vector<double> z1(hidden_size);
vector<double> a1(hidden_size);
vector<double> z2(output_size);
vector<double> a2(output_size);
for (int j = 0; j < hidden_size; ++j) {
for (int i = 0; i < input_size; ++i) {
z1[j] += x[n][i] * w1[i][j];
}
z1[j] += b1[j];
a1[j] = sigmoid(z1[j]);
}
for (int k = 0; k < output_size; ++k) {
for (int j = 0; j < hidden_size; ++j) {
z2[k] += a1[j] * w2[j][k];
}
z2[k] += b2[k];
a2[k] = sigmoid(z2[k]);
}
// 反向传播
vector<double> delta2(output_size);
for (int k = 0; k < output_size; ++k) {
delta2[k] = a2[k] - y[n][k];
}
vector<double> delta1(hidden_size);
for (int j = 0; j < hidden_size; ++j) {
for (int k = 0; k < output_size; ++k) {
delta1[j] += delta2[k] * w2[j][k] * a1[j] * (1 - a1[j]);
}
}
// 更新参数
for (int j = 0; j < hidden_size; ++j) {
for (int i = 0; i < input_size; ++i) {
w1[i][j] -= learning_rate * delta1[j] * x[n][i];
}
b1[j] -= learning_rate * delta1[j];
}
for (int k = 0; k < output_size; ++k) {
for (int j = 0; j < hidden_size; ++j) {
w2[j][k] -= learning_rate * delta2[k] * a1[j];
}
b2[k] -= learning_rate * delta2[k];
}
L += loss({x[n]}, {y[n]});
}
cout << "Epoch " << epoch << ", Loss: " << L / N << endl;
}
}
// 预测
int predict(vector<double> x) {
vector<double> a2 = forward(x);
int label = 0;
double max_prob = 0.0;
for (int k = 0; k < output_size; ++k) {
if (a2[k] > max_prob) {
max_prob = a2[k];
label = k;
}
}
return label;
}
// 保存模型
void save(string filename) {
ofstream file(filename);
file << input_size << " " << hidden_size << " " << output_size << endl;
for (int i = 0; i < input_size; ++i) {
for (int j = 0; j < hidden_size; ++j) {
file << w1[i][j] << " ";
}
}
for (int j = 0; j < hidden_size; ++j) {
file << b1[j] << " ";
}
for (int j = 0; j < hidden_size; ++j) {
for (int k = 0; k < output_size; ++k) {
file << w2[j][k] << " ";
}
}
for (int k = 0; k < output_size; ++k) {
file << b2[k] << " ";
}
file.close();
}
// 加载模型
void load(string filename) {
ifstream file(filename);
int input_size_, hidden_size_, output_size_;
file >> input_size_ >> hidden_size_ >> output_size_;
if (input_size_ != input_size || hidden_size_ != hidden_size || output_size_ != output_size) {
cerr << "Error: model size mismatch" << endl;
return;
}
for (int i = 0; i < input_size; ++i) {
for (int j = 0; j < hidden_size; ++j) {
file >> w1[i][j];
}
}
for (int j = 0; j < hidden_size; ++j) {
file >> b1[j];
}
for (int j = 0; j < hidden_size; ++j) {
for (int k = 0; k < output_size; ++k) {
file >> w2[j][k];
}
}
for (int k = 0; k < output_size; ++k) {
file >> b2[k];
}
file.close();
}
private:
vector<vector<double>> w1;
vector<vector<double>> w2;
vector<double> b1;
vector<double> b2;
};
// 读取数据集
void read_data(string filename, vector<vector<double>>& x, vector<vector<double>>& y) {
ifstream file(filename);
int magic_number, n;
file.read((char*)&magic_number, 4);
magic_number = __builtin_bswap32(magic_number);
file.read((char*)&n, 4);
n = __builtin_bswap32(n);
x.resize(n);
y.resize(n);
for (int i = 0; i < n; ++i) {
x[i].resize(input_size);
file.read((char*)x[i].data(), input_size);
for (int j = 0; j < input_size; ++j) {
x[i][j] /= 255.0;
}
}
file.close();
string label_filename = filename.substr(0, filename.size() - 3) + "idx1-ubyte";
file.open(label_filename);
file.read((char*)&magic_number, 4);
file.read((char*)&n, 4);
n = __builtin_bswap32(n);
for (int i = 0; i < n; ++i) {
y[i].resize(output_size);
uint8_t label;
file.read((char*)&label, 1);
for (int k = 0; k < output_size; ++k) {
y[i][k] = (k == label ? 1.0 : 0.0);
}
}
file.close();
}
int main() {
vector<vector<double>> train_x, train_y, test_x, test_y;
read_data("train-images-idx3-ubyte", train_x, train_y);
read_data("t10k-images-idx3-ubyte", test_x, test_y);
NeuralNetwork model;
model.train(train_x, train_y, 0.1, 100);
model.save("model.txt");
int correct = 0;
int total = test_x.size();
for (int i = 0; i < total; ++i) {
int label = model.predict(test_x[i]);
if (label == distance(test_y[i].begin(), max_element(test_y[i].begin(), test_y[i].end()))) {
++correct;
}
}
cout << "Accuracy: " << double(correct) / total << endl;
return 0;
}
```
这段代码实现了一个简单的两层神经网络,使用sigmoid作为激活函数,交叉熵作为损失函数,使用随机梯度下降算法训练模型。读取MINST数据集时,需要将数据归一化到0到1之间。训练完成后,可以将模型保存到文件中,以便后续使用。测试时,计算预测准确率。
阅读全文