c++训练mnist手写数字识别的函数
时间: 2023-12-16 22:03:16 浏览: 168
以下是使用C++实现的一个简单的MNIST手写数字识别程序:
```c++
#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
using namespace std;
const int kImageSize = 28;
const int kNumImages = 60000;
const int kNumTestImages = 10000;
struct Image {
vector<vector<double>> pixels;
int label;
};
vector<Image> ReadImages(const string& images_file, const string& labels_file) {
vector<Image> images;
ifstream images_in(images_file, ios::binary);
ifstream labels_in(labels_file, ios::binary);
if (images_in.is_open() && labels_in.is_open()) {
int magic_number, num_images, num_rows, num_cols;
images_in.read((char*)&magic_number, sizeof(magic_number));
images_in.read((char*)&num_images, sizeof(num_images));
images_in.read((char*)&num_rows, sizeof(num_rows));
images_in.read((char*)&num_cols, sizeof(num_cols));
labels_in.read((char*)&magic_number, sizeof(magic_number));
labels_in.read((char*)&num_images, sizeof(num_images));
for (int i = 0; i < kNumImages; ++i) {
Image image;
image.pixels.resize(kImageSize);
for (int j = 0; j < kImageSize; ++j) {
image.pixels[j].resize(kImageSize);
for (int k = 0; k < kImageSize; ++k) {
unsigned char pixel;
images_in.read((char*)&pixel, sizeof(pixel));
image.pixels[j][k] = static_cast<double>(pixel) / 255.0;
}
}
unsigned char label;
labels_in.read((char*)&label, sizeof(label));
image.label = static_cast<int>(label);
images.push_back(image);
}
}
return images;
}
double Sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
vector<double> Softmax(const vector<double>& logits) {
vector<double> probabilities(logits.size());
double max_logit = *max_element(begin(logits), end(logits));
double sum = 0.0;
for (int i = 0; i < logits.size(); ++i) {
probabilities[i] = exp(logits[i] - max_logit);
sum += probabilities[i];
}
for (int i = 0; i < probabilities.size(); ++i) {
probabilities[i] /= sum;
}
return probabilities;
}
class NeuralNetwork {
public:
NeuralNetwork(int num_inputs, int num_hidden, int num_outputs) :
num_inputs_(num_inputs), num_hidden_(num_hidden), num_outputs_(num_outputs) {
weights_ih_.resize(num_inputs_, vector<double>(num_hidden_));
weights_ho_.resize(num_hidden_, vector<double>(num_outputs_));
biases_h_.resize(num_hidden_);
biases_o_.resize(num_outputs_);
for (int i = 0; i < num_inputs_; ++i) {
for (int j = 0; j < num_hidden_; ++j) {
weights_ih_[i][j] = (static_cast<double>(rand()) / RAND_MAX - 0.5) * 2.0 / sqrt(num_inputs_);
}
}
for (int i = 0; i < num_hidden_; ++i) {
for (int j = 0; j < num_outputs_; ++j) {
weights_ho_[i][j] = (static_cast<double>(rand()) / RAND_MAX - 0.5) * 2.0 / sqrt(num_hidden_);
}
}
}
vector<double> Predict(const vector<double>& inputs) {
vector<double> hidden(num_hidden_);
vector<double> outputs(num_outputs_);
for (int i = 0; i < num_hidden_; ++i) {
double sum = 0.0;
for (int j = 0; j < num_inputs_; ++j) {
sum += weights_ih_[j][i] * inputs[j];
}
hidden[i] = Sigmoid(sum + biases_h_[i]);
}
for (int i = 0; i < num_outputs_; ++i) {
double sum = 0.0;
for (int j = 0; j < num_hidden_; ++j) {
sum += weights_ho_[j][i] * hidden[j];
}
outputs[i] = sum + biases_o_[i];
}
return Softmax(outputs);
}
void Train(const vector<Image>& images, int num_epochs, double learning_rate) {
for (int epoch = 0; epoch < num_epochs; ++epoch) {
double loss = 0.0;
for (const auto& image : images) {
vector<double> inputs(kImageSize * kImageSize);
for (int i = 0; i < kImageSize; ++i) {
for (int j = 0; j < kImageSize; ++j) {
inputs[i * kImageSize + j] = image.pixels[i][j];
}
}
vector<double> targets(num_outputs_);
targets[image.label] = 1.0;
vector<double> hidden(num_hidden_);
vector<double> outputs(num_outputs_);
for (int i = 0; i < num_hidden_; ++i) {
double sum = 0.0;
for (int j = 0; j < num_inputs_; ++j) {
sum += weights_ih_[j][i] * inputs[j];
}
hidden[i] = Sigmoid(sum + biases_h_[i]);
}
for (int i = 0; i < num_outputs_; ++i) {
double sum = 0.0;
for (int j = 0; j < num_hidden_; ++j) {
sum += weights_ho_[j][i] * hidden[j];
}
outputs[i] = sum + biases_o_[i];
}
vector<double> probabilities = Softmax(outputs);
for (int i = 0; i < num_outputs_; ++i) {
loss -= targets[i] * log(probabilities[i]);
}
vector<double> output_errors(targets.size());
for (int i = 0; i < num_outputs_; ++i) {
output_errors[i] = targets[i] - probabilities[i];
}
vector<double> hidden_errors(num_hidden_);
for (int i = 0; i < num_hidden_; ++i) {
double error = 0.0;
for (int j = 0; j < num_outputs_; ++j) {
error += output_errors[j] * weights_ho_[i][j];
}
hidden_errors[i] = hidden[i] * (1.0 - hidden[i]) * error;
}
for (int i = 0; i < num_hidden_; ++i) {
for (int j = 0; j < num_outputs_; ++j) {
weights_ho_[i][j] += learning_rate * output_errors[j] * hidden[i];
}
}
for (int i = 0; i < num_inputs_; ++i) {
for (int j = 0; j < num_hidden_; ++j) {
weights_ih_[i][j] += learning_rate * hidden_errors[j] * inputs[i];
}
}
for (int i = 0; i < num_hidden_; ++i) {
biases_h_[i] += learning_rate * hidden_errors[i];
}
for (int i = 0; i < num_outputs_; ++i) {
biases_o_[i] += learning_rate * output_errors[i];
}
}
cout << "Epoch " << epoch + 1 << ", loss = " << loss / images.size() << endl;
}
}
private:
int num_inputs_;
int num_hidden_;
int num_outputs_;
vector<vector<double>> weights_ih_;
vector<vector<double>> weights_ho_;
vector<double> biases_h_;
vector<double> biases_o_;
};
int main() {
srand(time(nullptr));
vector<Image> train_images = ReadImages("train-images-idx3-ubyte", "train-labels-idx1-ubyte");
vector<Image> test_images = ReadImages("t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte");
NeuralNetwork nn(kImageSize * kImageSize, 128, 10);
nn.Train(train_images, 10, 0.1);
int num_correct = 0;
for (const auto& image : test_images) {
vector<double> inputs(kImageSize * kImageSize);
for (int i = 0; i < kImageSize; ++i) {
for (int j = 0; j < kImageSize; ++j) {
inputs[i * kImageSize + j] = image.pixels[i][j];
}
}
vector<double> probabilities = nn.Predict(inputs);
int predicted_label = distance(begin(probabilities), max_element(begin(probabilities), end(probabilities)));
if (predicted_label == image.label) {
++num_correct;
}
}
cout << "Accuracy = " << static_cast<double>(num_correct) / kNumTestImages << endl;
return 0;
}
```
该程序使用了一个简单的全连接神经网络来识别MNIST手写数字图像,每个图像都被压缩成一个长度为784的一维向量,并输入到神经网络中进行训练和预测。训练过程使用交叉熵作为损失函数,并使用随机梯度下降算法进行优化。在测试过程中,使用测试集对模型进行评估,输出预测的准确率。
阅读全文