给以上代码每行添加注释
时间: 2023-12-26 12:14:04 浏览: 62
以下是每行代码的注释:
```python
import numpy as np
# 生成单层决策树的函数
def buildStump(dataArr, classLabels, D):
# 将数据和标签转换为 NumPy 矩阵
Matrix = np.mat(dataArr)
label = np.mat(classLabels).T
# 获取数据集的行数和列数
m, n = Matrix.shape
# 定义步数和初始化最佳单层决策树、最小误差和预测结果
Steps = 20.0
bestStump = {}
bestClasEst = np.mat(np.zeros(shape=(m, 1)))
minError = np.inf
# 遍历每个特征
for i in range(n):
# 计算该特征的最小值和最大值
rangeMin = Matrix[:, i].min()
rangeMax = Matrix[:, i].max()
# 计算步长
stepSize = (rangeMax - rangeMin) / Steps
# 遍历每个步长
for j in range(-1, int(Steps) + 1):
# 对于每个不等式符号
for inequal in ['lt', 'gt']:
# 计算阈值
threshVal = (rangeMin + float(j) * stepSize)
# 根据阈值和不等式符号生成预测结果
predictedVals = stumpClassify(Matrix, i, threshVal, inequal)
# 计算误差向量
errArr = np.mat(np.ones(shape=(m, 1)))
errArr[predictedVals == label] = 0
# 计算加权误差率
weightedError = D.T * errArr
# 如果加权误差率更小,则更新最佳单层决策树、最小误差和预测结果
if weightedError < minError:
minError = weightedError
bestClasEst = predictedVals.copy()
bestStump['dim'] = i
bestStump['thresh'] = threshVal
bestStump['ineq'] = inequal
# 返回最佳单层决策树、最小误差和预测结果
return bestStump, minError, bestClasEst
# 训练 AdaBoost 模型的函数
def adaBoostTrainDS(dataArr, classLabels, numIt=40):
# 将数据转换为 NumPy 矩阵
dataArr = np.mat(dataArr)
# 定义弱分类器集合、样本数和初始化权重向量、预测结果
weakClassArr = []
m = dataArr.shape[0]
D = np.mat(np.ones(shape=(m, 1)) / m)
aggClassEst = np.mat(np.zeros(shape=(m, 1)))
# 遍历每个迭代
for i in range(numIt):
# 生成最佳单层决策树
bestStump, error, classEst = buildStump(dataArr, classLabels, D)
# 计算该决策树的权重
alpha = float(0.5 * np.log((1 - error) / max(error, 1e-16)))
bestStump['alpha'] = alpha
# 将该决策树及其权重添加到弱分类器集合中
weakClassArr.append(bestStump)
# 计算样本权重的指数项
expon = np.multiply(-1 * alpha * np.mat(classLabels).T, classEst)
# 更新样本权重向量并归一化
D = np.multiply(D, np.exp(expon))
D = D / D.sum()
# 计算目前为止模型在训练集上的预测误差
aggClassEst += alpha * classEst
aggErrors = np.multiply(np.sign(aggClassEst) != np.mat(classLabels).T, np.ones(shape=(m, 1)))
errorRate = aggErrors.sum() / m
print("total error: ", errorRate)
# 如果模型已经完美地分类了所有样本,则停止训练
if errorRate == 0.0:
break
# 返回弱分类器集合和模型的预测结果
return weakClassArr, aggClassEst
```
阅读全文