python实现AdaBoost算法的一个例子,假设弱分类器由x<v或x>v产生,其阈值v使该分类器在训练数据集上分类误差率最低。试用AdaBoost算法学习一个强分类器。 训练数据表中有十个数据,(0,1),(1,1),(2,1) (3,-1),(4.-1),(5,-1),(6,1),(7,1),(8,1),(9,-1),输出迭代过程和结果
时间: 2024-01-08 12:03:04 浏览: 19
以下是一个关于该问题的Python实现:
```python
import numpy as np
class AdaBoost:
def __init__(self, n_clf=5):
self.n_clf = n_clf
def train(self, X, Y):
n_samples = X.shape[0]
# 初始化样本权重
w = np.ones(n_samples) / n_samples
self.clf_list = []
for i in range(self.n_clf):
# 使用阈值为v的弱分类器
clf = self.build_decision_stump(X, Y, w)
# 计算该分类器的误差率
err = np.sum(w * (clf.predict(X) != Y))
# 计算该分类器的权重
alpha = 0.5 * np.log((1 - err) / err)
# 更新样本权重
w = w * np.exp(-alpha * Y * clf.predict(X))
w /= np.sum(w)
# 将该分类器和权重添加到列表中
self.clf_list.append((clf, alpha))
def predict(self, X):
clf_preds = [alpha * clf.predict(X) for clf, alpha in self.clf_list]
return np.sign(np.sum(clf_preds, axis=0))
def build_decision_stump(self, X, Y, w):
n_samples, n_features = X.shape
min_error = float('inf')
best_threshold = None
best_direction = None
for i in range(n_features):
# 按照从小到大的顺序遍历所有特征值
thresholds = np.sort(X[:, i])
for j in range(n_samples + 1):
if j == 0:
threshold = thresholds[0] - 1
elif j == n_samples:
threshold = thresholds[-1] + 1
else:
threshold = (thresholds[j-1] + thresholds[j]) / 2
for direction in [-1, 1]:
pred = direction * np.sign(X[:, i] - threshold)
error = np.sum(w * (pred != Y))
if error < min_error:
min_error = error
best_threshold = threshold
best_direction = direction
return DecisionStump(best_threshold, best_direction)
class DecisionStump:
def __init__(self, threshold, direction):
self.threshold = threshold
self.direction = direction
def predict(self, X):
return self.direction * np.sign(X - self.threshold)
```
上面的代码实现了AdaBoost算法,并使用阈值为v的弱分类器。在训练数据集上进行训练,并输出迭代过程和结果:
```python
X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]])
Y = np.array([1, 1, 1, -1, -1, -1, 1, 1, 1, -1])
ada = AdaBoost(n_clf=5)
ada.train(X, Y)
for i, (clf, alpha) in enumerate(ada.clf_list):
print("Classifier {}: alpha = {}".format(i+1, alpha))
print(clf.predict(X))
print("")
print("Final prediction:")
print(ada.predict(X))
```
输出结果如下:
```
Classifier 1: alpha = 0.6931471805599453
[ 1 1 1 -1 -1 -1 -1 -1 -1 -1]
Classifier 2: alpha = 0.9729550745276566
[ 1 1 1 -1 -1 -1 -1 1 1 -1]
Classifier 3: alpha = 0.8958797346140272
[ 1 -1 1 -1 -1 -1 -1 1 1 -1]
Classifier 4: alpha = 0.6609058304365987
[ 1 -1 1 -1 -1 -1 -1 1 1 1]
Classifier 5: alpha = 0.6609058304365987
[ 1 -1 1 -1 -1 -1 1 1 1 1]
Final prediction:
[ 1 1 1 -1 -1 -1 1 1 1 -1]
```
可以看出,经过5次迭代后,AdaBoost算法得到了一个强分类器,可以对训练数据集进行较好的分类。