朴素贝叶斯分类器代码c语言
时间: 2023-08-12 10:25:39 浏览: 92
以下是一个简单的朴素贝叶斯分类器的 C 语言代码示例:
```
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define MAX_FEATURES 1000
#define MAX_SAMPLES 10000
int num_features, num_samples;
int feature_counts[MAX_FEATURES][2];
double feature_probs[MAX_FEATURES][2];
int sample_classes[MAX_SAMPLES];
int sample_features[MAX_SAMPLES][MAX_FEATURES];
void read_data(char *filename) {
FILE *fp = fopen(filename, "r");
if (!fp) {
perror("Error opening file");
exit(EXIT_FAILURE);
}
char line[1024];
while (fgets(line, 1024, fp)) {
char *token = strtok(line, ",");
int class = atoi(token);
sample_classes[num_samples] = class;
for (int i = 0; i < num_features; i++) {
token = strtok(NULL, ",");
int feature = atoi(token);
sample_features[num_samples][i] = feature;
if (feature) {
feature_counts[i][class]++;
}
}
num_samples++;
}
fclose(fp);
}
void train() {
for (int i = 0; i < num_features; i++) {
int count_0 = feature_counts[i][0];
int count_1 = feature_counts[i][1];
int total = count_0 + count_1;
feature_probs[i][0] = (double) count_0 / total;
feature_probs[i][1] = (double) count_1 / total;
}
}
double classify(int *features) {
double log_prob_0 = 0.0;
double log_prob_1 = 0.0;
for (int i = 0; i < num_features; i++) {
if (features[i]) {
log_prob_0 += log(feature_probs[i][0]);
log_prob_1 += log(feature_probs[i][1]);
} else {
log_prob_0 += log(1.0 - feature_probs[i][0]);
log_prob_1 += log(1.0 - feature_probs[i][1]);
}
}
double prob_0 = exp(log_prob_0);
double prob_1 = exp(log_prob_1);
double total_prob = prob_0 + prob_1;
return prob_0 / total_prob;
}
int main(int argc, char **argv) {
if (argc != 3) {
printf("Usage: %s <train_file> <test_file>\n", argv[0]);
exit(EXIT_FAILURE);
}
char *train_file = argv[1];
char *test_file = argv[2];
FILE *fp = fopen(test_file, "r");
if (!fp) {
perror("Error opening file");
exit(EXIT_FAILURE);
}
char line[1024];
while (fgets(line, 1024, fp)) {
char *token = strtok(line, ",");
int class = atoi(token);
int features[num_features];
for (int i = 0; i < num_features; i++) {
token = strtok(NULL, ",");
features[i] = atoi(token);
}
double prob_0 = classify(features);
int predicted_class = (prob_0 >= 0.5) ? 0 : 1;
printf("%d,%d\n", class, predicted_class);
}
fclose(fp);
return 0;
}
```
以上代码实现了一个简单的朴素贝叶斯分类器,包括读取数据、训练模型和分类等功能。其中,特征和样本数的最大值可以通过宏定义进行调整。在主函数中,输入参数为训练数据文件和测试数据文件的文件名。测试数据文件中的每一行表示一个测试样本,第一个数字为其类别,后面的数字为其特征值。程序将对每个测试样本进行分类,并输出其真实类别和预测类别。
阅读全文