多元最小二乘回归 C++有截距的带类实现 及案例
时间: 2024-02-20 14:56:26 浏览: 74
以下是一个简单的多元最小二乘回归的C++实现,包括截距。我们将使用一个类来实现这个回归。
```c++
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
class MultipleLinearRegression {
private:
vector<vector<double>> x; // 自变量
vector<double> y; // 因变量
int n; // 样本数
int k; // 自变量数
vector<double> beta; // 系数
double b0; // 截距
public:
MultipleLinearRegression(vector<vector<double>> x, vector<double> y) {
this->x = x;
this->y = y;
n = x.size();
k = x[0].size();
}
void fit() {
vector<vector<double>> x_transpose(k, vector<double>(n)); // 自变量矩阵的转置
for (int i = 0; i < n; i++) {
for (int j = 0; j < k; j++) {
x_transpose[j][i] = x[i][j];
}
}
vector<vector<double>> x_transpose_x(k, vector<double>(k)); // 自变量矩阵的转置乘自变量矩阵
for (int i = 0; i < k; i++) {
for (int j = 0; j < k; j++) {
for (int l = 0; l < n; l++) {
x_transpose_x[i][j] += x_transpose[i][l] * x[l][j];
}
}
}
vector<double> x_transpose_y(k); // 自变量矩阵的转置乘因变量向量
for (int i = 0; i < k; i++) {
for (int j = 0; j < n; j++) {
x_transpose_y[i] += x_transpose[i][j] * y[j];
}
}
beta.resize(k);
for (int i = 0; i < k; i++) {
beta[i] = 0;
}
for (int i = 0; i < k; i++) { // 解方程 Ax=b
int max_row = i;
for (int j = i + 1; j < k; j++) {
if (abs(x_transpose_x[j][i]) > abs(x_transpose_x[max_row][i])) {
max_row = j;
}
}
swap(x_transpose_x[i], x_transpose_x[max_row]);
swap(x_transpose_y[i], x_transpose_y[max_row]);
for (int j = i + 1; j < k; j++) {
double ratio = x_transpose_x[j][i] / x_transpose_x[i][i];
for (int l = i; l < k; l++) {
x_transpose_x[j][l] -= ratio * x_transpose_x[i][l];
}
x_transpose_y[j] -= ratio * x_transpose_y[i];
}
}
for (int i = k - 1; i >= 0; i--) { // 回代求解
double sum = 0;
for (int j = i + 1; j < k; j++) {
sum += x_transpose_x[i][j] * beta[j];
}
beta[i] = (x_transpose_y[i] - sum) / x_transpose_x[i][i];
}
double sum = 0;
for (int i = 0; i < n; i++) {
sum += y[i];
}
double y_mean = sum / n;
sum = 0;
for (int i = 0; i < n; i++) {
double y_pred = b0;
for (int j = 0; j < k; j++) {
y_pred += beta[j] * x[i][j];
}
sum += pow(y_pred - y_mean, 2);
}
double ssr = 0;
for (int i = 0; i < n; i++) {
double y_pred = b0;
for (int j = 0; j < k; j++) {
y_pred += beta[j] * x[i][j];
}
ssr += pow(y_pred - y_mean, 2);
}
double sse = 0;
for (int i = 0; i < n; i++) {
double y_pred = b0;
for (int j = 0; j < k; j++) {
y_pred += beta[j] * x[i][j];
}
sse += pow(y[i] - y_pred, 2);
}
double sst = ssr + sse;
double r_squared = ssr / sst;
b0 = y_mean - beta[0] * x_transpose[0][0] / x_transpose_x[0][0];
cout << "系数:";
for (int i = 0; i < k; i++) {
cout << beta[i] << " ";
}
cout << endl << "截距:" << b0 << endl;
cout << "R²:" << r_squared << endl;
}
};
int main() {
vector<vector<double>> x{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}};
vector<double> y{1, 2, 3, 4, 5};
MultipleLinearRegression mlr(x, y);
mlr.fit();
return 0;
}
```
在这个实现中,我们通过 $Ax=b$ 解多元线性方程组,其中 $A$ 是自变量矩阵的转置乘自变量矩阵,$b$ 是自变量矩阵的转置乘因变量向量。我们使用高斯消元法来解这个方程组。
最后,我们计算回归系数和截距,以及计算 $R^2$ 来评估回归模型的拟合程度。注意,在计算截距时,我们使用了第一个自变量的均值。
阅读全文