有精英策略的遗传算法的特征选择python
时间: 2023-08-26 10:05:19 浏览: 183
基于python实现的遗传算法实验源码+详细注释+项目说明+实验结果及总结.7z
以下是使用遗传算法进行特征选择的Python示例代码:
```python
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
# 创建一组分类数据
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=2, random_state=42)
# 将数据集拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
class FeatureSelectorGA():
def __init__(self, n_population, n_generations, crossover_proba, mutation_proba, n_features, scoring_func):
self.n_population = n_population
self.n_generations = n_generations
self.crossover_proba = crossover_proba
self.mutation_proba = mutation_proba
self.n_features = n_features
self.scoring_func = scoring_func
self.population = None
def _initialize_population(self):
population_shape = (self.n_population, self.n_features)
population = np.random.randint(2, size=population_shape)
return population
def _compute_fitness(self, population, X, y):
scores = []
for chromosome in population:
selected_features = np.where(chromosome == 1)[0]
if len(selected_features) == 0:
score = 0.0
else:
X_subset = X[:, selected_features]
score = self.scoring_func(X_subset, y)
scores.append(score)
scores = np.array(scores)
sorted_idx = np.argsort(scores)[::-1]
return scores[sorted_idx], population[sorted_idx]
def _crossover(self, parent_1, parent_2):
child_1 = np.zeros_like(parent_1)
child_2 = np.zeros_like(parent_2)
crossover_point = np.random.randint(1, len(parent_1)-1)
child_1[:crossover_point] = parent_1[:crossover_point]
child_1[crossover_point:] = parent_2[crossover_point:]
child_2[:crossover_point] = parent_2[:crossover_point]
child_2[crossover_point:] = parent_1[crossover_point:]
return child_1, child_2
def _mutate(self, chromosome):
mutation_point = np.random.randint(len(chromosome))
if chromosome[mutation_point] == 0:
chromosome[mutation_point] = 1
else:
chromosome[mutation_point] = 0
return chromosome
def _create_new_population(self, sorted_population):
new_population = np.zeros_like(sorted_population)
n_parents = int(self.crossover_proba * self.n_population)
new_population[:n_parents] = sorted_population[:n_parents]
for i in range(n_parents, self.n_population):
parent_1_idx = np.random.randint(n_parents)
parent_2_idx = np.random.randint(n_parents)
parent_1 = sorted_population[parent_1_idx]
parent_2 = sorted_population[parent_2_idx]
child_1, child_2 = self._crossover(parent_1, parent_2)
if np.random.rand() < self.mutation_proba:
child_1 = self._mutate(child_1)
if np.random.rand() < self.mutation_proba:
child_2 = self._mutate(child_2)
new_population[i] = child_1
return new_population
def fit(self, X, y):
self.population = self._initialize_population()
for i in range(self.n_generations):
scores, sorted_population = self._compute_fitness(self.population, X, y)
print(f'Generation {i} - Best score: {scores[0]}')
self.population = self._create_new_population(sorted_population)
self.scores_, self.population_ = self._compute_fitness(self.population, X, y)
def transform(self, X):
selected_features = np.where(self.population_[0] == 1)[0]
return X[:, selected_features]
def accuracy_scorer(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
return score
# 定义遗传算法的参数
n_population = 100
n_generations = 10
crossover_proba = 0.8
mutation_proba = 0.2
n_features = X.shape[1]
# 创建特征选择器
selector = FeatureSelectorGA(n_population, n_generations, crossover_proba, mutation_proba, n_features, accuracy_scorer)
# 运行特征选择器
selector.fit(X, y)
# 使用选择的特征进行分类
X_selected = selector.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
score = accuracy_score(y_test, y_pred)
print(f'Selected {X_selected.shape[1]} features, accuracy score: {score}')
```
这个示例代码使用遗传算法进行特征选择,使用Logistic回归模型作为评估器,并使用准确性作为评分函数。可以通过调整参数来优化特征选择的性能。
阅读全文