逐步回归分析的代码以及示例
时间: 2023-08-06 22:17:34 浏览: 203
以下是Python中使用逐步回归分析进行变量筛选的示例代码:
```python
import pandas as pd
import numpy as np
import statsmodels.api as sm
# 读取数据
data = pd.read_csv('data.csv')
x = data.drop(columns=['target'])
y = data['target']
# 前向选择
def forward_select(data, target):
variate = set(data.columns)
selected = []
current_score, best_new_score = float('inf'), float('inf')
while variate and current_score == best_new_score:
scores_with_candidates = []
for candidate in variate:
formula = "{} ~ {}".format(target, ' + '.join(selected + [candidate]))
score = sm.OLS.from_formula(formula, data).fit().aic
scores_with_candidates.append((score, candidate))
scores_with_candidates.sort()
best_new_score, best_candidate = scores_with_candidates.pop(0)
if current_score > best_new_score:
current_score = best_new_score
selected.append(best_candidate)
variate.remove(best_candidate)
formula = "{} ~ {}".format(target, ' + '.join(selected))
model = sm.OLS.from_formula(formula, data)
result = model.fit()
return result
# 后向消元
def backward_eliminate(data, target):
variate = set(data.columns)
selected = list(variate)
current_score, best_new_score = float('inf'), float('inf')
while variate and current_score == best_new_score:
scores_with_candidates = []
for candidate in variate:
formula = "{} ~ {}".format(target, ' + '.join(selected.remove(candidate)))
score = sm.OLS.from_formula(formula, data).fit().aic
scores_with_candidates.append((score, candidate))
scores_with_candidates.sort()
best_new_score, best_candidate = scores_with_candidates.pop(0)
if current_score > best_new_score:
current_score = best_new_score
selected.remove(best_candidate)
variate.remove(best_candidate)
formula = "{} ~ {}".format(target, ' + '.join(selected))
model = sm.OLS.from_formula(formula, data)
result = model.fit()
return result
# 双向搜索
def stepwise_selection(data, target):
variate = set(data.columns)
selected = []
current_score, best_new_score = float('inf'), float('inf')
while variate and current_score == best_new_score:
forward_scores_with_candidates = []
for candidate in variate:
formula = "{} ~ {}".format(target, ' + '.join(selected + [candidate]))
score = sm.OLS.from_formula(formula, data).fit().aic
forward_scores_with_candidates.append((score, candidate))
forward_scores_with_candidates.sort()
best_forward_new_score, best_forward_candidate = forward_scores_with_candidates.pop(0)
backward_scores_with_candidates = []
for candidate in selected:
formula = "{} ~ {}".format(target, ' + '.join(selected.remove(candidate)))
score = sm.OLS.from_formula(formula, data).fit().aic
backward_scores_with_candidates.append((score, candidate))
backward_scores_with_candidates.sort()
best_backward_new_score, best_backward_candidate = backward_scores_with_candidates.pop(0)
if best_forward_new_score < best_backward_new_score and current_score > best_forward_new_score:
current_score = best_forward_new_score
selected.append(best_forward_candidate)
variate.remove(best_forward_candidate)
elif current_score > best_backward_new_score:
current_score = best_backward_new_score
selected.remove(best_backward_candidate)
variate.add(best_backward_candidate)
formula = "{} ~ {}".format(target, ' + '.join(selected))
model = sm.OLS.from_formula(formula, data)
result = model.fit()
return result
# 前向选择示例
forward_select_result = forward_select(data, 'target')
print(forward_select_result.summary())
# 后向消元示例
backward_eliminate_result = backward_eliminate(data, 'target')
print(backward_eliminate_result.summary())
# 双向搜索示例
stepwise_selection_result = stepwise_selection(data, 'target')
print(stepwise_selection_result.summary())
```
这里使用了statsmodels库进行线性回归分析,其中forward_select()、backward_eliminate()和stepwise_selection()分别实现了前向选择、后向消元和双向搜索。每个函数的输入为数据集和目标变量名称,输出为线性回归模型的结果。在示例中,分别用三种方法对数据集进行了变量筛选,并输出了各自的结果摘要。
阅读全文