朴素贝叶斯算法c++实现
时间: 2023-07-26 22:33:44 浏览: 53
以下是朴素贝叶斯算法的C++实现示例:
```c++
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <map>
#include <cmath>
using namespace std;
class NaiveBayes {
private:
vector<vector<double>> trainData; // 训练集数据
vector<int> trainLabel; // 训练集标签
int featureSize; // 特征数量
int classSize; // 类别数量
map<int, int> classCnt; // 每个类别的样本数量
map<int, vector<double>> classMean; // 每个类别的特征均值
map<int, vector<double>> classVar; // 每个类别的特征方差
map<int, double> classPrior; // 每个类别的先验概率
public:
NaiveBayes(int featureSize, int classSize) {
this->featureSize = featureSize;
this->classSize = classSize;
trainData.resize(classSize);
for (int i = 0; i < classSize; i++) {
trainData[i].resize(featureSize);
}
}
void fit(vector<vector<double>>& X, vector<int>& y) {
int n = X.size();
if (n != (int)y.size()) {
cout << "Error: size of X and y not match!\n";
return;
}
for (int i = 0; i < n; i++) {
int c = y[i];
classCnt[c]++;
for (int j = 0; j < featureSize; j++) {
trainData[c][j] += X[i][j];
}
}
for (int i = 0; i < classSize; i++) {
classPrior[i] = (double)classCnt[i] / n;
for (int j = 0; j < featureSize; j++) {
classMean[i].push_back(trainData[i][j] / classCnt[i]);
classVar[i].push_back(0);
}
}
for (int i = 0; i < n; i++) {
int c = y[i];
for (int j = 0; j < featureSize; j++) {
classVar[c][j] += pow(X[i][j] - classMean[c][j], 2);
}
}
for (int i = 0; i < classSize; i++) {
for (int j = 0; j < featureSize; j++) {
classVar[i][j] /= classCnt[i];
}
}
}
int predict(vector<double>& x) {
int maxC = 0;
double maxP = -1;
for (int i = 0; i < classSize; i++) {
double p = classPrior[i];
for (int j = 0; j < featureSize; j++) {
p *= 1 / sqrt(2 * M_PI * classVar[i][j]) * exp(-pow(x[j] - classMean[i][j], 2) / (2 * classVar[i][j]));
}
if (p > maxP) {
maxP = p;
maxC = i;
}
}
return maxC;
}
};
int main() {
// 读取数据
vector<vector<double>> X;
vector<int> y;
ifstream fin("data.txt");
string line;
while (getline(fin, line)) {
istringstream ss(line);
double val;
vector<double> vec;
while (ss >> val) {
vec.push_back(val);
}
X.push_back(vec);
y.push_back(vec.back());
}
// 模型训练
int featureSize = X[0].size() - 1;
int classSize = *max_element(y.begin(), y.end()) + 1;
NaiveBayes clf(featureSize, classSize);
clf.fit(X, y);
// 模型预测
vector<double> x(featureSize);
x[0] = 5.1;
x[1] = 3.5;
x[2] = 1.4;
x[3] = 0.2;
int pred = clf.predict(x);
cout << "Predicted class: " << pred << endl;
return 0;
}
```
在该实现中,我们首先定义了一个 NaiveBayes 类,里面包含了模型的训练和预测方法。在训练方法 fit 中,我们统计了每个类别的样本数量、特征均值和特征方差,以及每个类别的先验概率。在预测方法 predict 中,我们计算了每个类别的后验概率,并选择后验概率最大的类别作为预测结果。
该实现中的数据集格式为每行一个样本,以空格分隔各个特征,最后一个特征为标签。如果你想尝试该代码,请先将数据集保存为 data.txt 文件。