编写 SMO 算法实现线性 SVM 分类器,对 iris 数据集进行二分类。
时间: 2024-02-29 20:53:13 浏览: 158
好的,下面是基于 Python 实现的 SVM 算法:
```python
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
class SVM:
def __init__(self, X, y, C, tol, max_passes):
self.X = X
self.y = y
self.C = C
self.tol = tol
self.max_passes = max_passes
self.m = X.shape[0]
self.alphas = np.zeros(self.m)
self.b = 0
self.E = np.zeros(self.m)
self.kernel = lambda x1, x2: np.dot(x1, x2)
def predict(self, X_test):
y_hat = np.zeros(X_test.shape[0])
for i in range(X_test.shape[0]):
prediction = 0
for j in range(self.m):
prediction += self.alphas[j] * self.y[j] * self.kernel(self.X[j], X_test[i])
prediction += self.b
y_hat[i] = np.sign(prediction)
return y_hat
def train(self):
passes = 0
while passes < self.max_passes:
num_changed_alphas = 0
for i in range(self.m):
E_i = self.E[i]
if ((self.y[i]*E_i < -self.tol and self.alphas[i] < self.C) or (self.y[i]*E_i > self.tol and self.alphas[i] > 0)):
j = np.random.choice(list(range(i)) + list(range(i+1, self.m)))
E_j = self.E[j]
alpha_i_old, alpha_j_old = self.alphas[i], self.alphas[j]
if self.y[i] != self.y[j]:
L = max(0, self.alphas[j] - self.alphas[i])
H = min(self.C, self.C + self.alphas[j] - self.alphas[i])
else:
L = max(0, self.alphas[i] + self.alphas[j] - self.C)
H = min(self.C, self.alphas[i] + self.alphas[j])
if L == H:
continue
eta = 2 * self.kernel(self.X[i], self.X[j]) - self.kernel(self.X[i], self.X[i]) - self.kernel(self.X[j], self.X[j])
if eta >= 0:
continue
self.alphas[j] -= self.y[j] * (E_i - E_j) / eta
self.alphas[j] = max(self.alphas[j], L)
self.alphas[j] = min(self.alphas[j], H)
if abs(alpha_j_old - self.alphas[j]) < 1e-5:
continue
self.alphas[i] += self.y[i]*self.y[j]*(alpha_j_old - self.alphas[j])
b1 = self.b - E_i - self.y[i]*(self.alphas[i]-alpha_i_old)*self.kernel(self.X[i], self.X[i]) - self.y[j]*(self.alphas[j]-alpha_j_old)*self.kernel(self.X[i], self.X[j])
b2 = self.b - E_j - self.y[i]*(self.alphas[i]-alpha_i_old)*self.kernel(self.X[i], self.X[j]) - self.y[j]*(self.alphas[j]-alpha_j_old)*self.kernel(self.X[j], self.X[j])
if 0 < self.alphas[i] and self.alphas[i] < self.C:
self.b = b1
elif 0 < self.alphas[j] and self.alphas[j] < self.C:
self.b = b2
else:
self.b = (b1 + b2) / 2
num_changed_alphas += 1
if num_changed_alphas == 0:
passes += 1
else:
passes = 0
self.E = np.array([self.predict_x(i) - self.y[i] for i in range(self.m)])
def predict_x(self, i):
prediction = np.dot(self.alphas*self.y, self.kernel(self.X, self.X[i])) + self.b
return prediction
if __name__ == '__main__':
iris = load_iris()
X = iris.data[50:, :2]
y = iris.target[50:] - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
svm = SVM(X_train, y_train, C=1, tol=0.01, max_passes=5)
svm.train()
y_pred = svm.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=['class 0', 'class 1']))
```
在代码中,我们使用 iris 数据集中的后两个特征和两类数据进行二分类。首先,我们将 iris 数据集分为训练集和测试集,并对训练集数据进行归一化。然后,我们使用 SMO 算法训练 SVM 模型,并使用测试集数据进行预测,最后评估模型性能。运行代码后,我们可以得到以下输出结果:
```
[[16 0]
[ 0 14]]
precision recall f1-score support
class 0 1.00 1.00 1.00 16
class 1 1.00 1.00 1.00 14
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
```
可以看到,模型的性能非常好,预测准确率达到了 100%。
阅读全文