用c++写MNIST分类系统
时间: 2023-06-20 14:09:11 浏览: 115
手写字符识别MNIST+VS2015+OpenCV.rarp
MNIST是一个手写数字图片集,我们可以使用C++来编写一个基于卷积神经网络的MNIST分类系统。下面是一个简单的代码示例:
首先需要加载MNIST数据集,我们可以使用以下代码:
```c++
#include <fstream>
#include <iostream>
#include <vector>
using namespace std;
vector<vector<double>> load_mnist_images(string filename, int& num_images) {
ifstream file(filename, ios::binary);
if (file.is_open()) {
int magic_number = 0, n_rows = 0, n_cols = 0;
file.read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number));
magic_number = reverse_int(magic_number);
file.read(reinterpret_cast<char*>(&num_images), sizeof(num_images));
num_images = reverse_int(num_images);
file.read(reinterpret_cast<char*>(&n_rows), sizeof(n_rows));
n_rows = reverse_int(n_rows);
file.read(reinterpret_cast<char*>(&n_cols), sizeof(n_cols));
n_cols = reverse_int(n_cols);
vector<vector<double>> images(num_images, vector<double>(n_rows * n_cols));
for (int i = 0; i < num_images; ++i) {
for (int j = 0; j < n_rows * n_cols; ++j) {
unsigned char pixel = 0;
file.read(reinterpret_cast<char*>(&pixel), sizeof(pixel));
images[i][j] = static_cast<double>(pixel) / 255.0;
}
}
return images;
}
else {
cout << "Cannot open file: " << filename << endl;
exit(-1);
}
}
vector<int> load_mnist_labels(string filename, int& num_labels) {
ifstream file(filename, ios::binary);
if (file.is_open()) {
int magic_number = 0;
file.read(reinterpret_cast<char*>(&magic_number), sizeof(magic_number));
magic_number = reverse_int(magic_number);
file.read(reinterpret_cast<char*>(&num_labels), sizeof(num_labels));
num_labels = reverse_int(num_labels);
vector<int> labels(num_labels);
for (int i = 0; i < num_labels; ++i) {
unsigned char label = 0;
file.read(reinterpret_cast<char*>(&label), sizeof(label));
labels[i] = static_cast<int>(label);
}
return labels;
}
else {
cout << "Cannot open file: " << filename << endl;
exit(-1);
}
}
```
接下来,我们需要实现卷积神经网络模型来对MNIST数据集进行分类。以下是一个简单的卷积神经网络模型示例:
```c++
#include <vector>
#include <cmath>
using namespace std;
double sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
double relu(double x) {
return max(0.0, x);
}
class Conv2D {
public:
Conv2D(int in_channels, int out_channels, int kernel_size, int stride) :
in_channels_(in_channels),
out_channels_(out_channels),
kernel_size_(kernel_size),
stride_(stride),
weights_(out_channels, vector<vector<vector<double>>>(in_channels, vector<vector<double>>(kernel_size, vector<double>(kernel_size)))),
biases_(out_channels) {
for (int i = 0; i < out_channels; ++i) {
biases_[i] = 0.0;
for (int j = 0; j < in_channels; ++j) {
for (int k = 0; k < kernel_size; ++k) {
for (int l = 0; l < kernel_size; ++l) {
weights_[i][j][k][l] = ((double)rand() / RAND_MAX - 0.5) * sqrt(2.0 / (in_channels + out_channels));
}
}
}
}
}
vector<vector<vector<double>>> operator()(const vector<vector<double>>& input) {
int in_height = input.size();
int in_width = input[0].size();
int out_height = (in_height - kernel_size_) / stride_ + 1;
int out_width = (in_width - kernel_size_) / stride_ + 1;
vector<vector<vector<double>>> output(out_channels_, vector<vector<double>>(out_height, vector<double>(out_width)));
for (int i = 0; i < out_channels_; ++i) {
for (int j = 0; j < out_height; ++j) {
for (int k = 0; k < out_width; ++k) {
double sum = 0.0;
for (int l = 0; l < in_channels_; ++l) {
for (int m = 0; m < kernel_size_; ++m) {
for (int n = 0; n < kernel_size_; ++n) {
int row = j * stride_ + m;
int col = k * stride_ + n;
sum += input[l][row][col] * weights_[i][l][m][n];
}
}
}
output[i][j][k] = sigmoid(sum + biases_[i]);
}
}
}
return output;
}
private:
int in_channels_;
int out_channels_;
int kernel_size_;
int stride_;
vector<vector<vector<vector<double>>>> weights_;
vector<double> biases_;
};
class MaxPool2D {
public:
MaxPool2D(int kernel_size, int stride) :
kernel_size_(kernel_size),
stride_(stride) {}
vector<vector<vector<double>>> operator()(const vector<vector<double>>& input) {
int in_height = input.size();
int in_width = input[0].size();
int out_height = (in_height - kernel_size_) / stride_ + 1;
int out_width = (in_width - kernel_size_) / stride_ + 1;
vector<vector<vector<double>>> output(input.size(), vector<vector<double>>(out_height, vector<double>(out_width)));
for (int i = 0; i < input.size(); ++i) {
for (int j = 0; j < out_height; ++j) {
for (int k = 0; k < out_width; ++k) {
double max_val = -INFINITY;
for (int l = 0; l < kernel_size_; ++l) {
for (int m = 0; m < kernel_size_; ++m) {
int row = j * stride_ + l;
int col = k * stride_ + m;
max_val = max(max_val, input[i][row][col]);
}
}
output[i][j][k] = max_val;
}
}
}
return output;
}
private:
int kernel_size_;
int stride_;
};
class Flatten {
public:
vector<double> operator()(const vector<vector<vector<double>>>& input) {
vector<double> output;
for (const auto& channel : input) {
for (const auto& row : channel) {
for (const auto& pixel : row) {
output.push_back(pixel);
}
}
}
return output;
}
};
class Dense {
public:
Dense(int in_features, int out_features) :
in_features_(in_features),
out_features_(out_features),
weights_(out_features, vector<double>(in_features)),
biases_(out_features) {
for (int i = 0; i < out_features; ++i) {
biases_[i] = 0.0;
for (int j = 0; j < in_features; ++j) {
weights_[i][j] = ((double)rand() / RAND_MAX - 0.5) * sqrt(2.0 / (in_features + out_features));
}
}
}
vector<double> operator()(const vector<double>& input) {
vector<double> output(out_features_);
for (int i = 0; i < out_features_; ++i) {
double sum = 0.0;
for (int j = 0; j < in_features_; ++j) {
sum += input[j] * weights_[i][j];
}
output[i] = relu(sum + biases_[i]);
}
return output;
}
private:
int in_features_;
int out_features_;
vector<vector<double>> weights_;
vector<double> biases_;
};
class Softmax {
public:
vector<double> operator()(const vector<double>& input) {
double max_val = -INFINITY;
for (const auto& val : input) {
max_val = max(max_val, val);
}
vector<double> output(input.size());
double sum = 0.0;
for (int i = 0; i < input.size(); ++i) {
output[i] = exp(input[i] - max_val);
sum += output[i];
}
for (auto& val : output) {
val /= sum;
}
return output;
}
};
class Model {
public:
Model() :
conv1_(1, 32, 5, 1),
pool1_(2, 2),
conv2_(32, 64, 5, 1),
pool2_(2, 2),
flatten_(),
dense1_(7 * 7 * 64, 256),
dense2_(256, 10),
softmax_() {}
vector<double> operator()(const vector<vector<vector<double>>>& input) {
auto output = conv1_(input);
output = pool1_(output);
output = conv2_(output);
output = pool2_(output);
output = flatten_(output);
output = dense1_(output);
output = dense2_(output);
output = softmax_(output);
return output;
}
private:
Conv2D conv1_;
MaxPool2D pool1_;
Conv2D conv2_;
MaxPool2D pool2_;
Flatten flatten_;
Dense dense1_;
Dense dense2_;
Softmax softmax_;
};
```
最后,我们可以使用以下代码来训练和测试模型:
```c++
int main() {
int num_images_train, num_labels_train;
auto images_train = load_mnist_images("train-images.idx3-ubyte", num_images_train);
auto labels_train = load_mnist_labels("train-labels.idx1-ubyte", num_labels_train);
int num_images_test, num_labels_test;
auto images_test = load_mnist_images("t10k-images.idx3-ubyte", num_images_test);
auto labels_test = load_mnist_labels("t10k-labels.idx1-ubyte", num_labels_test);
Model model;
int batch_size = 64;
int num_batches = num_images_train / batch_size;
double learning_rate = 0.001;
int num_epochs = 10;
for (int epoch = 1; epoch <= num_epochs; ++epoch) {
double loss = 0.0;
for (int batch_idx = 0; batch_idx < num_batches; ++batch_idx) {
int start_idx = batch_idx * batch_size;
int end_idx = start_idx + batch_size;
vector<vector<vector<double>>> batch_images(images_train.begin() + start_idx, images_train.begin() + end_idx);
vector<int> batch_labels(labels_train.begin() + start_idx, labels_train.begin() + end_idx);
vector<vector<double>> logits(batch_size, vector<double>(10));
for (int i = 0; i < batch_size; ++i) {
logits[i] = model({ batch_images[i] });
}
vector<vector<double>> gradients(batch_size, vector<double>(10));
for (int i = 0; i < batch_size; ++i) {
for (int j = 0; j < 10; ++j) {
gradients[i][j] = logits[i][j] - (batch_labels[i] == j ? 1.0 : 0.0);
}
}
auto output = model({ batch_images[0] });
loss += cross_entropy_loss(output, batch_labels[0]);
auto delta = cross_entropy_loss_backward(output, batch_labels[0]);
auto gradients = model.backward(delta);
model.update_weights(gradients, learning_rate);
}
cout << "Epoch " << epoch << ", Loss: " << loss / num_batches << endl;
int correct = 0;
for (int i = 0; i < num_images_test; ++i) {
auto output = model({ images_test[i] });
int prediction = argmax(output);
if (prediction == labels_test[i]) {
++correct;
}
}
double accuracy = static_cast<double>(correct) / num_images_test;
cout << "Validation Accuracy: " << accuracy << endl;
}
return 0;
}
```
以上是一个简单的使用C++实现MNIST分类系统的示例,实际应用中可能需要更复杂的模型和训练技巧。
阅读全文