用pycharm写,用IPython解释器编写 SMO 算法实现线性 SVM 分类器,对 iris 数据集进行二分类。具体内容: (1)选取两个特征和两类数据进行二分类。 注意:二分类标签为 1 和-1。 (2)划分数据(分成训练集和数据集) (3)数据归一化 (4)训练模型(参考程序模板:SVM_numpy_template.py) (5)输出:SVM 对偶问题目标函数的最优解𝛼,决策函数的参数和截距,支持向量等。
时间: 2024-01-16 12:03:26 浏览: 82
好的,让我来回答你的问题。
首先,你需要安装以下库:
- numpy
- pandas
- sklearn
以下是代码实现:
```python
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
# 加载数据集
iris = load_iris()
X = iris.data[:, [0, 3]]
y = np.where(iris.target == 0, -1, 1)
# 数据归一化
ss = StandardScaler()
X = ss.fit_transform(X)
# 划分数据集
train_X, test_X = X[:100], X[100:]
train_y, test_y = y[:100], y[100:]
# SMO算法实现
class SVM:
def __init__(self, kernel='linear', C=1.0, tol=1e-3, max_iter=100):
self.kernel = kernel
self.C = C
self.tol = tol
self.max_iter = max_iter
def fit(self, X, y):
self.X = X
self.y = y
self.b = 0.0
self.alpha = np.zeros(len(X))
self.E = self._E(self.X, self.y, self.b, self.alpha)
for _ in range(self.max_iter):
for i in range(len(self.X)):
if self._KKT(self.E[i], self.y[i], self.alpha[i]):
j = self._select_j(i, self.E)
alpha_i_old, alpha_j_old = self.alpha[i], self.alpha[j]
if self.y[i] != self.y[j]:
L = max(0, self.alpha[j] - self.alpha[i])
H = min(self.C, self.C + self.alpha[j] - self.alpha[i])
else:
L = max(0, self.alpha[j] + self.alpha[i] - self.C)
H = min(self.C, self.alpha[j] + self.alpha[i])
eta = self._kernel(self.X[i], self.X[i]) + self._kernel(self.X[j], self.X[j]) - 2 * self._kernel(self.X[i], self.X[j])
if eta <= 0:
continue
self.alpha[j] += self.y[j] * (self.E[i] - self.E[j]) / eta
self.alpha[j] = np.clip(self.alpha[j], L, H)
self.alpha[i] += self.y[i] * self.y[j] * (alpha_j_old - self.alpha[j])
self.b = self._b(self.X, self.y, self.alpha)
self.E = self._E(self.X, self.y, self.b, self.alpha)
self.w = self._w(self.X, self.y, self.alpha)
def predict(self, X):
return np.sign(np.dot(X, self.w) + self.b)
def _kernel(self, x1, x2):
if self.kernel == 'linear':
return np.dot(x1, x2)
elif self.kernel == 'rbf':
gamma = 0.5
return np.exp(-gamma * np.linalg.norm(x1 - x2) ** 2)
def _w(self, X, y, alpha):
return np.dot(X.T, y * alpha)
def _b(self, X, y, alpha):
return np.mean(y - np.dot(X, self.w))
def _select_j(self, i, E):
j = np.argmax(np.abs(E - E[i]))
return j if E[i] * E[j] < 0 else np.random.randint(len(self.X))
def _KKT(self, E, y, alpha):
if alpha == 0:
return y * E >= -self.tol
elif 0 < alpha < self.C:
return np.abs(y * E) <= self.tol
else:
return y * E <= self.tol
def _E(self, X, y, b, alpha):
return np.dot(X, self.w) + b - y
svm = SVM(kernel='linear', C=1.0, tol=1e-3, max_iter=100)
svm.fit(train_X, train_y)
# 输出结果
print("alpha: ", svm.alpha)
print("w: ", svm.w)
print("b: ", svm.b)
# 计算训练集和测试集上的准确率
train_pred = svm.predict(train_X)
train_acc = np.mean(train_pred == train_y)
print("train accuracy: {:.2f}%".format(train_acc * 100))
test_pred = svm.predict(test_X)
test_acc = np.mean(test_pred == test_y)
print("test accuracy: {:.2f}%".format(test_acc * 100))
# 输出支持向量
support_vectors = svm.alpha > 0
print("number of support vectors: ", np.sum(support_vectors))
print("indices of support vectors: ", np.where(support_vectors))
```
运行以上代码,你将获得 SVM 对偶问题目标函数的最优解𝛼,决策函数的参数和截距,支持向量等信息。同时,你还可以获得训练集和测试集上的准确率。
希望能够帮助到你!
阅读全文