Python实现adaboost算法
时间: 2023-06-10 14:03:06 浏览: 215
Adaboost算法Python实现---详细注释版
Adaboost算法是一种集成学习方法,可以用来提高分类器的准确率。下面是用Python实现Adaboost算法的简单示例代码:
```python
from numpy import *
import matplotlib.pyplot as plt
# 构造数据
def loadSimpData():
datMat = matrix([[1. , 2.1],
[2. , 1.1],
[1.3, 1. ],
[1. , 1. ],
[2. , 1. ]])
classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
return datMat,classLabels
# 通过阈值比较对数据进行分类
def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
retArray = ones((shape(dataMatrix)[0],1))
if threshIneq == 'lt':
retArray[dataMatrix[:,dimen] <= threshVal] = -1.0
else:
retArray[dataMatrix[:,dimen] > threshVal] = -1.0
return retArray
# 找到最佳阈值进行数据分类
def buildStump(dataArr,classLabels,D):
dataMatrix = mat(dataArr)
labelMat = mat(classLabels).T
m,n = shape(dataMatrix)
numSteps = 10.0
bestStump = {}
bestClassEst = mat(zeros((m,1)))
minError = inf
for i in range(n):
rangeMin = dataMatrix[:,i].min()
rangeMax = dataMatrix[:,i].max()
stepSize = (rangeMax-rangeMin)/numSteps
for j in range(-1,int(numSteps)+1):
for inequal in ['lt', 'gt']:
threshVal = (rangeMin + float(j) * stepSize)
predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)
errArr = mat(ones((m,1)))
errArr[predictedVals == labelMat] = 0
weightedError = D.T*errArr
if weightedError < minError:
minError = weightedError
bestClassEst = predictedVals.copy()
bestStump['dim'] = i
bestStump['thresh'] = threshVal
bestStump['ineq'] = inequal
return bestStump,minError,bestClassEst
# Adaboost算法
def adaBoostTrainDS(dataArr,classLabels,numIt = 40):
weakClassArr = []
m = shape(dataArr)[0]
D = mat(ones((m,1))/m)
aggClassEst = mat(zeros((m,1)))
for i in range(numIt):
bestStump,error,classEst = buildStump(dataArr,classLabels,D)
alpha = float(0.5*log((1.0-error)/max(error,1e-16)))
bestStump['alpha'] = alpha
weakClassArr.append(bestStump)
expon = multiply(-1*alpha*mat(classLabels).T,classEst)
D = multiply(D,exp(expon))
D = D/D.sum()
aggClassEst += alpha*classEst
aggErrors = multiply(sign(aggClassEst) != mat(classLabels).T,ones((m,1)))
errorRate = aggErrors.sum()/m
if errorRate == 0.0: break
return weakClassArr,aggClassEst
# 绘制数据集和分类结果
def plotResult(dataArr,classLabels,classifier):
dataMat = array(dataArr)
n = shape(dataMat)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if classLabels[i] == 1.0:
xcord1.append(dataMat[i,0])
ycord1.append(dataMat[i,1])
else:
xcord2.append(dataMat[i,0])
ycord2.append(dataMat[i,1])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = arange(min(dataMat[:,0]), max(dataMat[:,0]), 0.1)
y = arange(min(dataMat[:,1]), max(dataMat[:,1]), 0.1)
X, Y = meshgrid(x, y)
Z = zeros(shape(X))
for i in range(len(x)):
for j in range(len(y)):
classLabel = classify([X[i,j], Y[i,j]], classifier)
Z[i,j] = classLabel
ax.contourf(X, Y, Z, alpha=0.2)
plt.show()
# 对输入的数据进行分类
def classify(data,classifiers):
dataMat = mat(data)
aggClassEst = 0.0
for classifier in classifiers:
classEst = stumpClassify(dataMat,classifier['dim'], \
classifier['thresh'],\
classifier['ineq'])
aggClassEst += classifier['alpha']*classEst
return sign(aggClassEst)
# 测试Adaboost算法
def testAdaboost():
dataArr,classLabels = loadSimpData()
classifiers,aggClassEst = adaBoostTrainDS(dataArr,classLabels,30)
plotResult(dataArr,classLabels,classifiers)
if __name__ == '__main__':
testAdaboost()
```
以上就是一个简单的Adaboost算法的Python实现示例。注意,Adaboost算法需要通过多个弱分类器来构建一个强分类器,因此在代码中,我们用buildStump()函数来找到最佳阈值进行数据分类,然后用adaBoostTrainDS()函数来训练Adaboost分类器。最后,我们可以用plotResult()函数来绘制分类的结果。
阅读全文