粒子群算法特征选择python
时间: 2023-07-10 11:06:29 浏览: 102
粒子群算法(Particle Swarm Optimization,PSO)是一种基于群体智能的优化算法,常用于特征选择。以下是使用Python实现粒子群算法特征选择的示例代码:
```python
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
# 加载数据集
iris = load_iris()
X, y = iris.data, iris.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 定义目标函数,即分类器的准确率
def fitness_function(position, X_train, y_train, X_test, y_test):
# 选择特征
idx = np.where(position == 1)[0]
if len(idx) == 0:
return 0
X_train_selected = X_train[:, idx]
X_test_selected = X_test[:, idx]
# 训练分类器
clf = KNeighborsClassifier()
clf.fit(X_train_selected, y_train)
# 计算准确率
accuracy = clf.score(X_test_selected, y_test)
return accuracy
# 定义粒子群类
class PSO:
def __init__(self, n_particles, n_features, max_iter, w, c1, c2):
self.n_particles = n_particles # 粒子数
self.n_features = n_features # 特征数
self.max_iter = max_iter # 最大迭代次数
self.w = w # 惯性权重
self.c1 = c1 # 自我学习因子
self.c2 = c2 # 社会学习因子
# 初始化粒子位置和速度
self.positions = np.random.randint(0, 2, size=(n_particles, n_features))
self.velocities = np.zeros((n_particles, n_features))
# 初始化全局最优位置和适应度
self.global_best_position = None
self.global_best_fitness = -np.inf
def optimize(self, X_train, y_train, X_test, y_test):
for i in range(self.max_iter):
# 计算适应度
fitness = np.array([fitness_function(p, X_train, y_train, X_test, y_test) for p in self.positions])
# 更新全局最优位置和适应度
if fitness.max() > self.global_best_fitness:
self.global_best_fitness = fitness.max()
self.global_best_position = self.positions[fitness.argmax(), :]
# 更新粒子速度和位置
r1, r2 = np.random.rand(2, self.n_particles, self.n_features)
self.velocities = self.w * self.velocities + self.c1 * r1 * (self.global_best_position - self.positions) + \
self.c2 * r2 * (np.tile(self.global_best_position, (self.n_particles, 1)) - self.positions)
self.positions = np.round(1 / (1 + np.exp(-self.velocities))) # sigmoid函数映射到[0, 1]
def get_selected_features(self):
return np.where(self.global_best_position == 1)[0]
# 设置超参数
n_particles = 20
n_features = X_train.shape[1]
max_iter = 50
w = 0.7
c1 = 1.4
c2 = 1.4
# 运行粒子群算法
pso = PSO(n_particles, n_features, max_iter, w, c1, c2)
pso.optimize(X_train, y_train, X_test, y_test)
# 输出选择的特征
selected_features = pso.get_selected_features()
print('Selected features:', selected_features)
```
其中,fitness_function是目标函数,PSO是粒子群类,optimize方法用于运行粒子群算法,get_selected_features方法用于获取选择的特征。在本例中,使用的是鸢尾花数据集,特征数为4,分类器为KNN,超参数根据经验设置。最终输出选择的特征。
阅读全文