绘制在简化版的SMO算法下生成的分类超平面,以及绘制在完整版的SMO算法下生成的分类超平面;数据集使用“testSet.txt”的python代码怎么写?
时间: 2024-03-18 16:39:26 浏览: 59
好的,我理解了您的问题。首先,让我来回答您的第一个问题:
在简化版的SMO算法下生成的分类超平面是一条直线,它的方程形式为:
w * x + b = 0
其中,w是法向量,x是特征向量,b是偏置项。在简化版的SMO算法中,我们只需要维护一个alpha向量,来存储每个样本点的拉格朗日乘子。对于每次迭代,我们只需要选择两个alpha向量进行优化,然后根据这两个alpha向量更新法向量w和偏置项b即可。
在完整版的SMO算法下生成的分类超平面是一个非线性的超平面,它的方程形式为:
f(x) = sum(alpha_i * y_i * K(x_i, x)) + b
其中,K(x_i, x)是核函数,用来计算两个样本点之间的相似度。在完整版的SMO算法中,我们需要维护一个alpha向量和一个E向量,来存储每个样本点的拉格朗日乘子和预测值与真实值之间的误差。对于每次迭代,我们需要选择两个alpha向量进行优化,并更新E向量和偏置项b。最终的分类超平面是由支持向量组成的。
接下来,让我来回答您的第二个问题:
要使用“testSet.txt”数据集,您可以采用以下Python代码:
```
import numpy as np
# 加载数据集
def loadDataSet(fileName):
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = line.strip().split('\t')
dataMat.append([float(lineArr[0]), float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat, labelMat
# 核函数
def kernelTrans(X, A, kTup):
m, n = np.shape(X)
K = np.mat(np.zeros((m, 1)))
if kTup[0] == 'lin':
K = X * A.T
elif kTup[0] == 'rbf':
for j in range(m):
deltaRow = X[j, :] - A
K[j] = deltaRow * deltaRow.T
K = np.exp(K / (-1 * kTup[1]**2))
else:
raise NameError('Kernel not recognized')
return K
# SMO算法
class optStruct:
def __init__(self, dataMatIn, classLabels, C, toler, kTup):
self.X = dataMatIn
self.labelMat = classLabels
self.C = C
self.tol = toler
self.m = np.shape(dataMatIn)[0]
self.alphas = np.mat(np.zeros((self.m, 1)))
self.b = 0
self.eCache = np.mat(np.zeros((self.m, 2)))
self.K = np.mat(np.zeros((self.m, self.m)))
for i in range(self.m):
self.K[:, i] = kernelTrans(self.X, self.X[i, :], kTup)
def calcEk(self, k):
fXk = float(np.multiply(self.alphas, self.labelMat).T * self.K[:, k] + self.b)
Ek = fXk - float(self.labelMat[k])
return Ek
def selectJrand(self, i):
j = i
while (j == i):
j = int(np.random.uniform(0, self.m))
return j
def selectJ(self, i, Ei):
maxK = -1
maxDeltaE = 0
Ej = 0
self.eCache[i] = [1, Ei]
validEcacheList = np.nonzero(self.eCache[:, 0].A)[0]
if (len(validEcacheList)) > 1:
for k in validEcacheList:
if k == i: continue
Ek = self.calcEk(k)
deltaE = abs(Ei - Ek)
if (deltaE > maxDeltaE):
maxK = k
maxDeltaE = deltaE
Ej = Ek
return maxK, Ej
else:
j = self.selectJrand(i)
Ej = self.calcEk(j)
return j, Ej
def updateEk(self, k):
Ek = self.calcEk(k)
self.eCache[k] = [1, Ek]
def innerL(self, i):
Ei = self.calcEk(i)
if (((self.labelMat[i] * Ei < -self.tol) and (self.alphas[i] < self.C)) or ((self.labelMat[i] * Ei > self.tol) and (self.alphas[i] > 0))):
j, Ej = self.selectJ(i, Ei)
alphaIold = self.alphas[i].copy()
alphaJold = self.alphas[j].copy()
if (self.labelMat[i] != self.labelMat[j]):
L = max(0, self.alphas[j] - self.alphas[i])
H = min(self.C, self.C + self.alphas[j] - self.alphas[i])
else:
L = max(0, self.alphas[j] + self.alphas[i] - self.C)
H = min(self.C, self.alphas[j] + self.alphas[i])
if L == H: print("L==H"); return 0
eta = 2.0 * self.K[i, j] - self.K[i, i] - self.K[j, j]
if eta >= 0: print("eta>=0"); return 0
self.alphas[j] -= self.labelMat[j] * (Ei - Ej) / eta
self.alphas[j] = self.clipAlpha(self.alphas[j], H, L)
self.updateEk(j)
if (abs(self.alphas[j] - alphaJold) < 0.00001): print("j not moving enough"); return 0
self.alphas[i] += self.labelMat[j] * self.labelMat[i] * (alphaJold - self.alphas[j])
self.updateEk(i)
b1 = self.b - Ei - self.labelMat[i] * (self.alphas[i] - alphaIold) * self.K[i, i] - self.labelMat[j] * (self.alphas[j] - alphaJold) * self.K[i, j]
b2 = self.b - Ej - self.labelMat[i] * (self.alphas[i] - alphaIold) * self.K[i, j] - self.labelMat[j] * (self.alphas[j] - alphaJold) * self.K[j, j]
if (0 < self.alphas[i]) and (self.C > self.alphas[i]): self.b = b1
elif (0 < self.alphas[j]) and (self.C > self.alphas[j]): self.b = b2
else: self.b = (b1 + b2) / 2.0
return 1
else:
return 0
def clipAlpha(self, aj, H, L):
if aj > H: aj = H
if L > aj: aj = L
return aj
def smoP(self, maxIter, kTup):
iter = 0
entireSet = True
alphaPairsChanged = 0
while (iter < maxIter) and ((alphaPairsChanged > 0) or (entireSet)):
alphaPairsChanged = 0
if entireSet:
for i in range(self.m):
alphaPairsChanged += self.innerL(i)
print("fullSet, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged))
iter += 1
else:
nonBoundIs = np.nonzero((self.alphas.A > 0) * (self.alphas.A < self.C))[0]
for i in nonBoundIs:
alphaPairsChanged += self.innerL(i)
print("non-bound, iter: %d i:%d, pairs changed %d" % (iter, i, alphaPairsChanged))
iter += 1
if entireSet: entireSet = False
elif (alphaPairsChanged == 0): entireSet = True
print("iteration number: %d" % iter)
return self.b, self.alphas
# 绘制分类超平面
def plotSVM(dataMat, labelMat, alphas, b):
import matplotlib.pyplot as plt
# 绘制样本点
data_plus = []
data_minus = []
for i in range(len(dataMat)):
if labelMat[i] > 0:
data_plus.append(dataMat[i])
else:
data_minus.append(dataMat[i])
data_plus_np = np.array(data_plus)
data_minus_np = np.array(data_minus)
plt.scatter(np.transpose(data_plus_np)[0], np.transpose(data_plus_np)[1])
plt.scatter(np.transpose(data_minus_np)[0], np.transpose(data_minus_np)[1])
# 绘制超平面
x1 = np.max(dataMat, axis=0)[0]
x2 = np.min(dataMat, axis=0)[0]
a1, a2 = alphas.A
w = np.dot(np.multiply(a1, labelMat).T, dataMat) - np.dot(np.multiply(a2, labelMat).T, dataMat)
b = labelMat[0] - np.dot(w, dataMat[0])
y1, y2 = (-b - w[0, 0] * x1) / w[0, 1], (-b - w[0, 0] * x2) / w[0, 1]
plt.plot([x1, x2], [y1, y2])
# 绘制支持向量
for i, alpha in enumerate(alphas):
if abs(alpha) > 0:
x, y = dataMat[i]
plt.scatter([x], [y], s=150, linewidth=1, facecolors='none', edgecolors='red')
plt.show()
# 测试
dataArr, labelArr = loadDataSet('testSet.txt')
b, alphas = smoP(dataArr, labelArr, 0.6, 0.001, 40, ('rbf', 1.3))
plotSVM(dataArr, labelArr, alphas, b)
```
这段代码包含了加载数据集、核函数、SMO算法、绘制分类超平面等部分。您只需要将代码中的“testSet.txt”替换成您自己的数据集文件名即可。
希望这些回答能够对您有所帮助!
阅读全文