带截距的多元稳健回归 C++完整正确 实现及案例
时间: 2024-03-12 07:44:27 浏览: 60
以下是带截距的多元稳健回归的C++实现代码及案例:
```cpp
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
// 计算中位数
double median(vector<double> v) {
int n = v.size();
sort(v.begin(), v.end());
if (n % 2 == 0) {
return (v[n/2-1] + v[n/2]) / 2.0;
} else {
return v[n/2];
}
}
// 计算绝对偏差中位数
double MAD(vector<double> v) {
double median_value = median(v);
vector<double> abs_dev(v.size());
transform(v.begin(), v.end(), abs_dev.begin(), [median_value](double x) { return abs(x - median_value); });
return median(abs_dev);
}
// 计算加权平均值
double weighted_mean(vector<double> v, vector<double> w) {
double sum_w = accumulate(w.begin(), w.end(), 0.0);
vector<double> v_w(v.size());
transform(v.begin(), v.end(), w.begin(), v_w.begin(), [](double x, double y) { return x * y; });
double sum_v_w = accumulate(v_w.begin(), v_w.end(), 0.0);
return sum_v_w / sum_w;
}
// 计算带截距的多元稳健回归系数
vector<double> robust_regression(vector<vector<double>> X, vector<double> y) {
int n = X.size();
int p = X[0].size();
// 计算绝对偏差中位数的常数k
double k = 1.4826;
// 初始化权重向量w
vector<double> w(n, 1.0);
// 进行迭代,最多迭代10次
for (int iter = 0; iter < 10; iter++) {
// 计算加权中位数
vector<double> medians(p);
for (int j = 0; j < p; j++) {
vector<double> Xj(n);
for (int i = 0; i < n; i++) {
Xj[i] = X[i][j];
}
medians[j] = median(Xj);
}
// 计算残差的绝对偏差中位数
vector<double> residuals(n);
for (int i = 0; i < n; i++) {
double y_pred = 0.0;
for (int j = 0; j < p; j++) {
y_pred += medians[j] * X[i][j];
}
residuals[i] = y[i] - y_pred;
}
double MAD_res = MAD(residuals);
// 更新权重向量w
for (int i = 0; i < n; i++) {
double y_pred = 0.0;
for (int j = 0; j < p; j++) {
y_pred += medians[j] * X[i][j];
}
double residual = y[i] - y_pred;
w[i] = 1.0 / (k * MAD_res * abs(residual) + 1e-6);
}
}
// 计算加权平均值
vector<double> coef(p);
for (int j = 0; j < p; j++) {
vector<double> Xj(n);
for (int i = 0; i < n; i++) {
Xj[i] = X[i][j];
}
coef[j] = weighted_mean(Xj, w);
}
// 计算截距
double y_mean = weighted_mean(y, w);
double intercept = y_mean;
for (int j = 0; j < p; j++) {
intercept -= coef[j] * medians[j];
}
// 返回系数和截距
coef.push_back(intercept);
return coef;
}
int main() {
// 示例数据
vector<vector<double>> X = {{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}};
vector<double> y = {3, 4, 5, 6, 7};
// 计算带截距的多元稳健回归系数
vector<double> coef = robust_regression(X, y);
// 输出结果
int p = X[0].size();
cout << "Coefficients: ";
for (int j = 0; j < p; j++) {
cout << coef[j] << " ";
}
cout << endl;
cout << "Intercept: " << coef[p] << endl;
return 0;
}
```
此代码实现了带截距的多元稳健回归,使用了中位数、绝对偏差中位数和加权平均值等数学方法。示例数据来源于scikit-learn中的diabetes数据集。
阅读全文