nsga2算法代码+数据 python
NSGA-II (Non-dominated Sorting Genetic Algorithm II) 是一种用于多目标优化问题的遗传算法。它采用了非支配排序和拥挤度距离来维护种群的多样性和均衡性。在Python中实现NSGA-II算法的代码通常会使用遗传算法库,如DEAP。
NSGA-II(Non-dominated Sorting Genetic Algorithm II,非支配排序遗传算法 II)是一种常用于多目标优化的遗传算法。下面是一个基于NSGA-II算法实现的特征选择的Python代码实例:
import numpy as np
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from deap import base, creator, tools, algorithms
# 加载数据
data = np.loadtxt('data.csv', delimiter=',')
X = data[:, :-1]
y = data[:, -1]
# 特征选择适应度函数
def evaluate(individual):
# 特征选择
selected_features = [index for index, value in enumerate(individual) if value]
X_new = X[:, selected_features]
# 数据归一化
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_new)
# 数据划分
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# 模型训练与预测
model = SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
# 返回准确率和特征个数
return accuracy, sum(individual),
# 个体和种群定义
creator.create('FitnessMax', base.Fitness, weights=(1.0, 1.0))
creator.create('Individual', list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register('attr_bool', np.random.randint, 0, 2)
toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.attr_bool, n=len(X[0]))
toolbox.register('population', tools.initRepeat, list, toolbox.individual)
toolbox.register('evaluate', evaluate)
toolbox.register('mate', tools.cxOnePoint)
toolbox.register('mutate', tools.mutFlipBit, indpb=0.05)
toolbox.register('select', tools.selNSGA2)
# 运行遗传算法
population_size = 100
num_generations = 50
population = toolbox.population(n=population_size)
for generation in range(num_generations):
offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.1)
fits = toolbox.map(toolbox.evaluate, offspring)
for fit, ind in zip(fits, offspring):
ind.fitness.values = fit
population = toolbox.select(offspring, k=population_size)
# 输出最终结果
best_individual = tools.selBest(population, k=1)[0]
selected_features = [index for index, value in enumerate(best_individual) if value]
print('Selected Features:', selected_features)
print('Number of Selected Features:', len(selected_features))
NSGA-II(Non-dominated Sorting Genetic Algorithm II)是一种多目标优化算法,用于解决具有多个目标函数的优化问题。特征提取是指从原始数据中选择最具代表性和区分性的特征子集。下面是一个使用Python实现NSGA-II特征提取的代码示例:
import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
def nsga2_feature_extraction(X, y, n_features):
# 数据归一化
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
# 特征选择
selector = SelectKBest(score_func=f_classif, k=n_features)
X_selected = selector.fit_transform(X_scaled, y)
# 主成分分析
pca = PCA(n_components=n_features)
X_pca = pca.fit_transform(X_selected)
# 轮盘赌选择算子
def roulette_wheel_selection(population, fitness_values):
total_fitness = np.sum(fitness_values)
probabilities = fitness_values / total_fitness
selected_index = np.random.choice(range(len(population)), p=probabilities)
return population[selected_index]
# 非支配排序算法
def non_dominated_sort(population, fitness_values):
fronts = []
ranks = np.zeros(len(population))
domination_count = np.zeros(len(population))
dominated_solutions = [[] for _ in range(len(population))]
for i in range(len(population)):
for j in range(i+1, len(population)):
if all(fitness_values[i] <= fitness_values[j]) and any(fitness_values[i] < fitness_values[j]):
domination_count[j] += 1
elif all(fitness_values[j] <= fitness_values[i]) and any(fitness_values[j] < fitness_values[i]):
domination_count[i] += 1
front = np.where(domination_count == 0)
while len(front) > 0:
for i in front:
for j in dominated_solutions[i]:
domination_count[j] -= 1
if domination_count[j] == 0:
front = np.append(front, j)
front = np.unique(front)
front = np.setdiff1d(front, fronts)
return fronts
# 计算适应度值
def calculate_fitness(X):
silhouette_scores = []
for i in range(X.shape):
score = silhouette_score(X[:, i].reshape(-1, 1), y)
return np.array(silhouette_scores)
# 初始化种群
population = np.random.rand(100, n_features)
# 迭代进化
for generation in range(100):
fitness_values = calculate_fitness(X_pca)
fronts = non_dominated_sort(population, fitness_values)
new_population = []
for front in fronts:
crowding_distance = np.zeros(len(front))
for i in range(n_features):
sorted_indices = np.argsort(X_pca[front, i])
crowding_distance[sorted_indices] = np.inf
crowding_distance[sorted_indices[-1]] = np.inf
for j in range(1, len(front)-1):
crowding_distance[sorted_indices[j]] += (X_pca[front[sorted_indices[j+1]], i] - X_pca[front[sorted_indices[j-1]], i])
sorted_indices = np.argsort(-crowding_distance)
for index in sorted_indices:
if len(new_population) == 100:
if len(new_population) == 100:
population = np.array(new_population)
# 交叉操作
for i in range(0, 100, 2):
parent1 = roulette_wheel_selection(population, fitness_values)
parent2 = roulette_wheel_selection(population, fitness_values)
child1 = np.zeros(n_features)
child2 = np.zeros(n_features)
for j in range(n_features):
if np.random.rand() < 0.5:
child1[j] = parent1[j]
child2[j] = parent2[j]
child1[j] = parent2[j]
child2[j] = parent1[j]
population[i] = child1
population[i+1] = child2
# 变异操作
for i in range(100):
for j in range(n_features):
if np.random.rand() < 0.01:
population[i, j] = np.random.rand()
# 最终选择最优解
fitness_values = calculate_fitness(X_pca)
best_solution_index = np.argmax(fitness_values)
best_solution = population[best_solution_index]
# 返回选择的特征子集
selected_features = selector.get_support(indices=True)
selected_features = selected_features[pca.components_.argsort()[-n_features:][::-1]]
selected_features = selected_features[best_solution.argsort()[-1]]
return selected_features.tolist()
# 使用示例
X = np.random.rand(100, 10) # 假设有100个样本,每个样本有10个特征
y = np.random.randint(0, 2, 100) # 假设有2个类别
n_features = 5 # 选择5个特征
selected_features = nsga2_feature_extraction(X, y, n_features)
print("Selected features:", selected_features)