周志华机器学习朴素贝叶斯公式c++代码实现
时间: 2023-10-27 18:22:46 浏览: 134
以下是朴素贝叶斯分类器的伪代码:
1. 训练阶段:
(1)对于每个类别 i,计算先验概率P(Y=i)。
(2)对于每个特征 j,计算在每个类别 i 下该特征的条件概率P(Xj=x|Y=i)。
2. 测试阶段:
(1)对于输入样本 x,计算该样本属于每个类别 i 的条件概率P(Y=i|X=x)。
(2)将样本分到概率最大的类别中。
下面是朴素贝叶斯分类器的 C++ 代码实现,其中假设特征和类别都是离散的:
```
#include <iostream>
#include <vector>
#include <map>
#include <cmath>
using namespace std;
// 计算先验概率
map<int, double> cal_prior_prob(vector<int>& labels) {
map<int, double> prior_prob;
int total = labels.size();
for (int i = 0; i < total; i++) {
int label = labels[i];
if (prior_prob.find(label) == prior_prob.end()) {
prior_prob[label] = 0.0;
}
prior_prob[label] += 1.0;
}
for (auto iter = prior_prob.begin(); iter != prior_prob.end(); iter++) {
iter->second /= total;
}
return prior_prob;
}
// 计算条件概率
map<int, map<int, double>> cal_cond_prob(vector<vector<int>>& features, vector<int>& labels) {
map<int, map<int, double>> cond_prob;
int total = labels.size();
int feature_num = features[0].size();
for (int i = 0; i < total; i++) {
int label = labels[i];
if (cond_prob.find(label) == cond_prob.end()) {
cond_prob[label] = map<int, double>();
}
for (int j = 0; j < feature_num; j++) {
int feature = features[i][j];
if (cond_prob[label].find(j) == cond_prob[label].end()) {
cond_prob[label][j] = map<int, double>();
}
if (cond_prob[label][j].find(feature) == cond_prob[label][j].end()) {
cond_prob[label][j][feature] = 0.0;
}
cond_prob[label][j][feature] += 1.0;
}
}
for (auto iter1 = cond_prob.begin(); iter1 != cond_prob.end(); iter1++) {
int label = iter1->first;
for (auto iter2 = iter1->second.begin(); iter2 != iter1->second.end(); iter2++) {
int feature = iter2->first;
int feature_total = iter2->second.size();
for (auto iter3 = iter2->second.begin(); iter3 != iter2->second.end(); iter3++) {
iter3->second /= feature_total;
}
}
}
return cond_prob;
}
// 预测
int predict(vector<int>& feature, map<int, double>& prior_prob, map<int, map<int, double>>& cond_prob) {
int label = -1;
double max_prob = -1.0;
for (auto iter = prior_prob.begin(); iter != prior_prob.end(); iter++) {
int cur_label = iter->first;
double cur_prob = log(iter->second);
for (int i = 0; i < feature.size(); i++) {
cur_prob += log(cond_prob[cur_label][i][feature[i]]);
}
if (cur_prob > max_prob) {
max_prob = cur_prob;
label = cur_label;
}
}
return label;
}
// 测试
void test(vector<vector<int>>& features, vector<int>& labels, map<int, double>& prior_prob, map<int, map<int, double>>& cond_prob) {
int total = labels.size();
int correct = 0;
for (int i = 0; i < total; i++) {
int predict_label = predict(features[i], prior_prob, cond_prob);
if (predict_label == labels[i]) {
correct++;
}
}
cout << "Accuracy: " << (double)correct / total << endl;
}
int main() {
// 读取数据,features存放特征,labels存放类别
vector<vector<int>> features;
vector<int> labels;
// 训练
map<int, double> prior_prob = cal_prior_prob(labels);
map<int, map<int, double>> cond_prob = cal_cond_prob(features, labels);
// 测试
test(features, labels, prior_prob, cond_prob);
return 0;
}
```
注意:这里的代码只是朴素贝叶斯分类器的一种实现方式,可能还有其他实现方式。
阅读全文