请给一份用西瓜数据集3.0实现朴素贝叶斯分类器的代码,python实现
时间: 2024-02-20 15:58:59 浏览: 93
好的,以下是使用西瓜数据集3.0实现朴素贝叶斯分类器的Python代码:
```python
import pandas as pd
import numpy as np
# 读取西瓜数据集3.0
watermelon_data = pd.read_csv('watermelon_3.csv')
# 定义朴素贝叶斯分类器
class NaiveBayes:
def __init__(self):
self.p_y = {} # 存储每个类别的先验概率
self.p_xy = {} # 存储每个特征在每个类别下的条件概率
def fit(self, X, y):
n_samples, n_features = X.shape
classes = np.unique(y)
n_classes = len(classes)
# 计算每个类别的先验概率
for c in classes:
self.p_y[c] = np.sum(y == c) / n_samples
# 计算每个特征在每个类别下的条件概率
for f in range(n_features):
for c in classes:
X_c = X[y == c]
self.p_xy[(f, c)] = (X_c[:, f].mean(), X_c[:, f].std())
def predict(self, X):
y_pred = np.zeros(X.shape[0])
for i, x in enumerate(X):
max_posterior = -1
max_class = -1
for c in self.p_y:
# 计算后验概率
posterior = self.p_y[c]
for f, value in enumerate(x):
mean, std = self.p_xy[(f, c)]
likelihood = self.gaussian_pdf(value, mean, std)
posterior *= likelihood
# 更新最大后验概率和对应的类别
if posterior > max_posterior:
max_posterior = posterior
max_class = c
y_pred[i] = max_class
return y_pred
# 高斯分布概率密度函数
def gaussian_pdf(self, x, mean, std):
exponent = np.exp(-((x-mean)**2 / (2 * std**2)))
return (1 / (np.sqrt(2 * np.pi) * std)) * exponent
# 将数据集拆分为训练集和测试集
X = watermelon_data.iloc[:, 1:-1].values
y = watermelon_data.iloc[:, -1].values
indices = np.random.permutation(len(X))
train_indices, test_indices = indices[:int(0.7*len(X))], indices[int(0.7*len(X)):]
X_train, y_train = X[train_indices], y[train_indices]
X_test, y_test = X[test_indices], y[test_indices]
# 训练并测试朴素贝叶斯分类器
nb = NaiveBayes()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
# 输出预测结果
print(y_pred)
```
需要注意的是,这里使用了高斯分布来估计每个特征在每个类别下的条件概率。如果数据集中存在离散特征,可以使用多项式分布或伯努利分布来进行估计。
阅读全文