贝叶斯分类器西瓜数据Python实现
时间: 2024-03-06 11:43:10 浏览: 97
以下是贝叶斯分类器在Python中使用西瓜数据集的实现:
```python
import pandas as pd
import numpy as np
# 加载数据集
data = pd.read_csv('watermelon.csv')
data = data.drop('编号', axis=1) # 删除编号列
# 将数据集分为训练集和测试集
train_data = data.sample(frac=0.8, random_state=0)
test_data = data.drop(train_data.index)
# 计算先验概率
p_good = len(train_data[train_data['好瓜'] == '是']) / len(train_data)
p_bad = len(train_data[train_data['好瓜'] == '否']) / len(train_data)
# 计算条件概率
def calc_conditional_prob(train_data, feature, value, label):
sub_data = train_data[train_data[label] == value]
return len(sub_data[sub_data[feature] == 1]) / len(sub_data)
# 预测函数
def predict(test_data):
results = []
for i in range(len(test_data)):
test_sample = test_data.iloc[i]
p_good_given_sample = p_good
p_bad_given_sample = p_bad
for feature in data.columns[:-1]:
value = test_sample[feature]
p_good_given_sample *= calc_conditional_prob(train_data, feature, value, '好瓜')
p_bad_given_sample *= calc_conditional_prob(train_data, feature, value, '坏瓜')
if p_good_given_sample > p_bad_given_sample:
results.append('是')
else:
results.append('否')
return results
# 测试模型
test_data['预测结果'] = predict(test_data)
accuracy = len(test_data[test_data['好瓜'] == test_data['预测结果']]) / len(test_data)
print('准确率:', accuracy)
```
阅读全文