python使用朴素贝叶斯算法对数据进行分类代码
时间: 2023-09-17 22:13:41 浏览: 98
以下是使用朴素贝叶斯算法对数据进行分类的Python代码示例:
```
import numpy as np
#生成样本数据
x = np.array([[1, 'S'], [1, 'M'], [1, 'M'], [1, 'S'], [1, 'S'],
[2, 'S'], [2, 'M'], [2, 'M'], [2, 'L'], [2, 'L'],
[3, 'L'], [3, 'M'], [3, 'M'], [3, 'L'], [3, 'L']])
y = np.array(['No', 'No', 'Yes', 'Yes', 'No', 'No', 'No', 'Yes',
'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No'])
#创建字典
def create_dict(data):
dict = {}
for i in range(data.shape[1]):
dict[i] = {}
for j in range(data.shape[0]):
if data[j][i] not in dict[i]:
dict[i][data[j][i]] = 0
dict[i][data[j][i]] += 1
return dict
#计算先验概率
def calculate_prior_prob(y):
dict = {}
for i in range(len(y)):
if y[i] not in dict:
dict[y[i]] = 0
dict[y[i]] += 1
for key in dict:
dict[key] /= len(y)
return dict
#计算条件概率
def calculate_cond_prob(x, y, dict):
cond_prob = {}
for key in dict:
cond_prob[key] = {}
for item in dict[key]:
cond_prob[key][item] = {}
for i in range(x.shape[1]):
cond_prob[key][item][i] = 0
for i in range(len(y)):
for j in range(x.shape[1]):
cond_prob[y[i]][x[i][j]][j] += 1
for key in dict:
for item in dict[key]:
for j in range(x.shape[1]):
cond_prob[key][item][j] /= dict[key][item]
return cond_prob
#朴素贝叶斯分类
def naive_bayes_predict(x, prior_prob, cond_prob):
y_pred = []
for i in range(x.shape[0]):
max_prob = 0
max_prob_class = ''
for key in prior_prob:
prob = prior_prob[key]
for j in range(x.shape[1]):
prob *= cond_prob[key][x[i][j]][j]
if prob > max_prob:
max_prob = prob
max_prob_class = key
y_pred.append(max_prob_class)
return y_pred
#使用朴素贝叶斯算法进行分类
dict = create_dict(x)
prior_prob = calculate_prior_prob(y)
cond_prob = calculate_cond_prob(x, y, dict)
y_pred = naive_bayes_predict(x, prior_prob, cond_prob)
#输出分类结果
print('Predicted classes:', y_pred)
```
该代码使用朴素贝叶斯算法对样本数据进行分类,输出分类结果。其中,实现了创建字典、计算先验概率、计算条件概率和朴素贝叶斯分类等核心步骤。
阅读全文