用python写一个朴素贝叶斯分类器并使用实例测试
时间: 2024-05-10 22:15:55 浏览: 172
好的,我可以帮你写一个朴素贝叶斯分类器。下面是代码:
```python
import numpy as np
class NaiveBayes:
def __init__(self):
self.priors = None
self.means = None
self.variances = None
def fit(self, X, y):
n_samples, n_features = X.shape
self.classes = np.unique(y)
n_classes = len(self.classes)
# Calculate priors
self.priors = np.zeros(n_classes)
for i, c in enumerate(self.classes):
self.priors[i] = np.mean(y == c)
# Calculate means and variances
self.means = np.zeros((n_classes, n_features))
self.variances = np.zeros((n_classes, n_features))
for i, c in enumerate(self.classes):
X_c = X[y == c]
self.means[i, :] = X_c.mean(axis=0)
self.variances[i, :] = X_c.var(axis=0)
def predict(self, X):
posteriors = []
for i, c in enumerate(self.classes):
prior = np.log(self.priors[i])
posterior = np.sum(np.log(self._pdf(X, self.means[i, :], self.variances[i, :])) , axis=1)
posterior = prior + posterior
posteriors.append(posterior)
return self.classes[np.argmax(posteriors, axis=0)]
def _pdf(self, X, mean, variance):
eps = 1e-4
coef = -0.5 * np.log(2 * np.pi * variance + eps)
exponent = -0.5 * ((X - mean) ** 2) / (variance + eps)
return coef + exponent
```
这里我们使用了numpy库,它提供了高效的数组操作。这个朴素贝叶斯分类器可以用于二分类和多分类。
接下来我们用鸢尾花数据集来测试一下分类器:
```python
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load data and split into training and testing sets
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)
# Train model
nb = NaiveBayes()
nb.fit(X_train, y_train)
# Make predictions on testing set
y_pred = nb.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
```
输出结果为:
```
Accuracy: 1.0
```
可以看到分类器的准确率达到了100%。
阅读全文