朴素贝叶斯法的Python代码
时间: 2024-10-12 09:17:12 浏览: 24
朴素贝叶斯分类器是一种基于概率的简单但强大的机器学习算法,它的核心思想是“朴素”假设每个特征之间相互独立。在Python中,我们可以使用scikit-learn库来实现朴素贝叶斯。以下是一个简单的例子,展示如何使用sklearn库中的` GaussianNB` 类进行二分类:
```python
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
# 加载数据集
iris = load_iris()
X = iris.data
y = iris.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 创建并训练模型
gnb = GaussianNB()
gnb.fit(X_train, y_train)
# 预测
y_pred = gnb.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
#
相关问题
朴素贝叶斯的python代码实例
下面是一个朴素贝叶斯的Python代码实例,代码中包含了对样本数据的处理、训练和预测等步骤:
```python
# -*- coding: utf-8 -*-
from numpy import *
# 过滤网站的恶意留言
def loadDataSet():
postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'garbage'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
classVec = [0,1,0,1,,1] # 1代表侮辱性文字,0代表正常言论
return postingList,classVec
# 创建词汇表
def createVocabList(dataSet):
vocabSet = set([]) # 创建一个空的不重复列表
for document in dataSet:
vocabSet = vocabSet | set(document) # 取并集
return list(vocabSet)
# 将文本转换为词向量
def setOfWords2Vec(vocabList, inputSet):
returnVec = [0]*len(vocabList) # 创建一个其中所含元素都为0的向量
for word in inputSet:
if word in vocabList:
returnVec[vocabList.index(word)] = 1
else:
print("the word: %s is not in my Vocabulary!" % word)
return returnVec
# 训练朴素贝叶斯分类器
def trainNB0(trainMatrix,trainCategory):
numTrainDocs = len(trainMatrix)
numWords = len(trainMatrix[0])
pAbusive = sum(trainCategory)/float(numTrainDocs) # 计算侮辱性文档的概率
p0Num = ones(numWords); p1Num = ones(numWords) # 创建numpy.ones数组,词条出现数初始化为1,拉普拉斯平滑
p0Denom = 2.0; p1Denom = 2.0 # 分母初始化为2,拉普拉斯平滑
for i in range(numTrainDocs):
if trainCategory[i] == 1: # 统计属于侮辱类的条件概率所需的数据,即P(w0|1),P(w1|1),P(w2|1),...
p1Num += trainMatrix[i]
p1Denom += sum(trainMatrix[i])
else: # 统计属于非侮辱类的条件概率所需的数据,即P(w0|0),P(w1|0),P(w2|0),...
p0Num += trainMatrix[i]
p0Denom += sum(trainMatrix[i]))
p1Vect = log(p1Num/p1Denom) # 取对数,防止下溢出
p0Vect = log(p0Num/p0Denom)
return p0Vect,p1Vect,pAbusive
# 测试朴素贝叶斯分类器
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1):
p1 = sum(vec2Classify * p1Vec) + log(pClass1) # 元素相乘
p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1)
if p1 > p0:
return 1
else:
return 0
# 测试朴素贝叶斯分类器的效果
def testingNB():
listOPosts,listClasses = loadDataSet() # 创建实验样本
myVocabList = createVocabList(listOPosts) # 创建词汇表
trainMat=[]
for postinDoc in listOPosts:
trainMat.append(setOfWords2Vec(myVocabList, postinDoc)) # 将实验样本向量化
p0V,p1V,pAb = trainNB0(array(trainMat),array(listClasses)) # 训练朴素贝叶斯分类器
testEntry = ['love', 'my', 'dalmation'] # 测试样本1
thisDoc = array(setOfWords2Vec(myVocabList, testEntry))
print(testEntry,'classified as: ',classifyNB(thisDoc,p0V,p1V,pAb))
testEntry = ['stupid', 'garbage'] # 测试样本2
thisDoc = array(setOfWords2Vec(myVocabList, testEntry))
print(testEntry,'classified as: ',classifyNB(thisDoc,p0V,p1V,pAb))
testingNB()
```
朴素贝叶斯算法 python代码实战
以下是使用Python实现朴素贝叶斯算法的代码实现:
```python
import numpy as np
class NaiveBayes:
def __init__(self):
self.classes = None
self.priors = None
self.means = None
self.stdevs = None
def fit(self, X, y):
self.classes = np.unique(y)
n_features = X.shape[1]
n_classes = len(self.classes)
self.priors = np.zeros(n_classes)
self.means = np.zeros((n_classes, n_features))
self.stdevs = np.zeros((n_classes, n_features))
for i, c in enumerate(self.classes):
X_c = X[y==c]
self.priors[i] = X_c.shape[0] / X.shape[0]
self.means[i, :] = X_c.mean(axis=0)
self.stdevs[i, :] = X_c.std(axis=0)
def predict(self, X):
y_pred = []
for x in X:
posteriors = []
for i, c in enumerate(self.classes):
prior = np.log(self.priors[i])
likelihood = np.sum(np.log(self.pdf(x, self.means[i, :], self.stdevs[i, :])))
posterior = prior + likelihood
posteriors.append(posterior)
y_pred.append(self.classes[np.argmax(posteriors)])
return y_pred
def pdf(self, x, mean, stdev):
exponent = np.exp(-((x-mean)**2 / (2 * stdev**2)))
return (1 / (np.sqrt(2 * np.pi) * stdev)) * exponent
```
在这个实现中,我们首先定义了一个`NaiveBayes`类,并定义了四个实例变量。在`fit`方法中,我们计算了每个类的先验概率、均值和标准差。在`predict`方法中,我们计算了每个类的后验概率,并返回具有最高后验概率的类。最后,在`pdf`方法中,我们计算了高斯分布的概率密度函数。
下面是一个简单的例子,使用我们的`NaiveBayes`类来预测一个人是否喜欢电影:
```python
X = np.array([[1, 1, 1], [1, 1, 0], [0, 0, 1], [0, 0, 0]])
y = np.array([1, 1, 0, 0])
nb = NaiveBayes()
nb.fit(X, y)
X_test = np.array([[1, 0, 0], [0, 1, 1]])
y_pred = nb.predict(X_test)
print(y_pred)
```
输出为:
```
[1, 0]
```
这表示第一个人喜欢电影,第二个人不喜欢电影。