python中的朴素贝叶斯算法代码实现及注释
时间: 2023-10-03 09:08:27 浏览: 172
以下是一个简单的朴素贝叶斯算法的代码实现及注释:
```python
import numpy as np
class NaiveBayes:
def __init__(self):
self.X = None # 特征矩阵
self.y = None # 标签矩阵
self.classes = None # 类别列表
self.n_samples = None # 样本数
self.n_features = None # 特征数
self.class_probabilities = None # 类别概率
self.feature_probabilities = None # 特征概率
def fit(self, X, y):
self.X = X
self.y = y
self.classes = np.unique(y)
self.n_samples, self.n_features = X.shape
self.class_probabilities = np.zeros(len(self.classes))
self.feature_probabilities = np.zeros((len(self.classes), self.n_features))
# 计算每个类别的概率以及每个特征在每个类别下的概率
for i, c in enumerate(self.classes):
X_c = self.X[self.y == c]
self.class_probabilities[i] = X_c.shape[0] / self.n_samples
self.feature_probabilities[i, :] = X_c.sum(axis=0) / X_c.sum()
def predict(self, X):
predictions = []
for x in X:
# 计算每个类别下的概率
class_probabilities = self.class_probabilities.copy()
for i in range(len(self.classes)):
for j in range(self.n_features):
# 若该特征值为0,需要根据贝叶斯公式进行修正
if x[j] == 0:
class_probabilities[i] *= self.feature_probabilities[i, j] * 0.01
else:
class_probabilities[i] *= self.feature_probabilities[i, j]
# 返回概率最大的类别
predictions.append(self.classes[np.argmax(class_probabilities)])
return predictions
```
注释如下:
```python
import numpy as np
class NaiveBayes:
def __init__(self):
self.X = None # 特征矩阵
self.y = None # 标签矩阵
self.classes = None # 类别列表
self.n_samples = None # 样本数
self.n_features = None # 特征数
self.class_probabilities = None # 类别概率
self.feature_probabilities = None # 特征概率
def fit(self, X, y):
self.X = X
self.y = y
self.classes = np.unique(y)
self.n_samples, self.n_features = X.shape
self.class_probabilities = np.zeros(len(self.classes))
self.feature_probabilities = np.zeros((len(self.classes), self.n_features))
# 计算每个类别的概率以及每个特征在每个类别下的概率
for i, c in enumerate(self.classes):
X_c = self.X[self.y == c]
self.class_probabilities[i] = X_c.shape[0] / self.n_samples
self.feature_probabilities[i, :] = X_c.sum(axis=0) / X_c.sum()
def predict(self, X):
predictions = []
for x in X:
# 计算每个类别下的概率
class_probabilities = self.class_probabilities.copy()
for i in range(len(self.classes)):
for j in range(self.n_features):
# 若该特征值为0,需要根据贝叶斯公式进行修正
if x[j] == 0:
class_probabilities[i] *= self.feature_probabilities[i, j] * 0.01
else:
class_probabilities[i] *= self.feature_probabilities[i, j]
# 返回概率最大的类别
predictions.append(self.classes[np.argmax(class_probabilities)])
return predictions
```
以上代码实现了朴素贝叶斯算法的基本流程,包括模型训练和预测。其中,模型训练过程中计算了每个类别的概率以及每个特征在每个类别下的概率,预测过程中根据贝叶斯公式计算了每个类别下的概率,最终返回概率最大的类别作为预测结果。在计算特征概率时,为了避免出现特征值为0的情况,代码对其进行了修正。
阅读全文