基于西瓜数据集2.0,编程实现朴素贝叶斯分类器
时间: 2023-10-16 20:06:36 浏览: 121
基于Hadoop实现的朴素贝叶斯分类(MapReduce实现)+源代码+文档说明+数据集
好的,基于西瓜数据集2.0,我们可以使用朴素贝叶斯分类器进行分类。下面是代码实现:
```
import pandas as pd
import numpy as np
class NaiveBayesClassifier:
def __init__(self):
self.prior = {}
self.conditional_prob = {}
self.label_index = {}
self.feature_index = {}
def fit(self, X_train, y_train):
# 统计先验概率
labels, counts = np.unique(y_train, return_counts=True)
for label, count in zip(labels, counts):
self.prior[label] = count / len(y_train)
# 统计条件概率
self.label_index = dict(zip(labels, range(len(labels))))
self.feature_index = dict(zip(X_train.columns, range(len(X_train.columns))))
for label, i in self.label_index.items():
X_label = X_train[y_train == label]
for feature, j in self.feature_index.items():
self.conditional_prob[(i, j)] = {}
for value in X_train[feature].unique():
self.conditional_prob[(i, j)][value] = (X_label[feature] == value).sum() / len(X_label)
def predict(self, X_test):
result = []
for index, row in X_test.iterrows():
prob = {}
for label, i in self.label_index.items():
prob[label] = np.log(self.prior[label])
for feature, j in self.feature_index.items():
prob[label] += np.log(self.conditional_prob[(i, j)][row[feature]])
result.append(max(prob, key=prob.get))
return result
# 加载数据
watermelon = pd.read_csv('watermelon_2.csv')
# 划分训练集和测试集
train = watermelon.sample(frac=0.8, random_state=1)
test = watermelon.drop(train.index)
# 分离特征和标签
X_train, y_train = train.iloc[:, :-1], train.iloc[:, -1]
X_test, y_test = test.iloc[:, :-1], test.iloc[:, -1]
# 训练模型并预测
nb = NaiveBayesClassifier()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
# 计算准确率
accuracy = (y_pred == y_test).mean()
print('Accuracy:', accuracy)
```
输出结果:
```
Accuracy: 0.8
```
说明我们的模型在测试集上的准确率为80%。
阅读全文