请给出正确的 带截距的多元分位数回归 C++ 的带类完整实现的及案例 自变量为多维
时间: 2024-04-24 11:22:13 浏览: 72
美赛-美赛常用模型之多元回归分析-Python实现.zip
以下是带截距的多元分位数回归的 C++ 类实现及案例代码:
```cpp
#include <iostream>
#include <vector>
#include <algorithm> // for sort
using namespace std;
class QuantileRegression {
private:
int n; // 样本量
int p; // 自变量数
int q; // 分位数数
vector<vector<double>> X; // 自变量矩阵
vector<double> Y; // 因变量向量
vector<double> tau; // 分位数向量
vector<vector<double>> beta; // 回归系数矩阵
// 对样本进行排序并计算权重
vector<vector<double>> sortSample(vector<vector<double>> sample, vector<double>& weights) {
int n = sample.size();
vector<vector<double>> sortedSample(n, vector<double>(p));
vector<pair<double, int>> Yn(n);
for (int i = 0; i < n; i++) {
Yn[i] = make_pair(sample[i][0], i); // 以因变量为第一维排序
for (int j = 0; j < p; j++) {
sortedSample[i][j] = sample[i][j+1];
}
}
sort(Yn.begin(), Yn.end()); // 排序
for (int i = 0; i < n; i++) {
int idx = Yn[i].second;
weights[idx] = i*1.0/n - Yn[i].first/n; // 计算权重
}
return sortedSample;
}
// 求解回归系数
vector<double> solveBeta(vector<vector<double>> X, vector<double> Y, vector<double> weights) {
int n = X.size();
int m = X[0].size();
vector<vector<double>> WX(n, vector<double>(n));
vector<double> WY(n);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
WX[i][j] = weights[i] * weights[j];
}
WY[i] = weights[i] * Y[i];
}
vector<double> beta(m);
for (int j = 0; j < m; j++) {
vector<vector<double>> Xj(n, vector<double>(1, 1.0));
for (int i = 0; i < n; i++) {
Xj[i][0] = X[i][j];
}
vector<vector<double>> WXXj(n, vector<double>(m));
for (int i = 0; i < n; i++) {
for (int k = 0; k < m; k++) {
WXXj[i][k] = weights[i] * X[i][k] * Xj[i][0];
}
}
vector<double> WYXj(n);
for (int i = 0; i < n; i++) {
WYXj[i] = weights[i] * Y[i] * Xj[i][0];
}
beta[j] = accumulate(WYXj.begin(), WYXj.end(), 0.0) / accumulate(WXXj[j].begin(), WXXj[j].end(), 0.0);
}
return beta;
}
public:
QuantileRegression(vector<vector<double>> X, vector<double> Y, vector<double> tau) {
this->X = X;
this->Y = Y;
this->tau = tau;
this->n = X.size();
this->p = X[0].size();
this->q = tau.size();
this->beta = vector<vector<double>>(q, vector<double>(p));
}
void fit() {
vector<double> weights(n);
for (int k = 0; k < q; k++) {
vector<vector<double>> sortedX = sortSample(X, weights);
vector<double> sortedY(n);
for (int i = 0; i < n; i++) {
sortedY[i] = Y[i];
}
sort(sortedY.begin(), sortedY.end());
double tauk = tau[k];
int idx = static_cast<int>(n * tauk);
double sk = sortedY[idx];
for (int i = 0; i < n; i++) {
if (Y[i] <= sk) {
weights[i] = tauk / idx;
} else {
weights[i] = (1 - tauk) / (n - idx);
}
}
vector<double> betaK = solveBeta(sortedX, sortedY, weights);
for (int j = 0; j < p; j++) {
beta[k][j] = betaK[j];
}
}
}
vector<vector<double>> predict(vector<vector<double>> Xnew) {
int m = Xnew.size();
vector<vector<double>> Ynew(q, vector<double>(m));
for (int k = 0; k < q; k++) {
for (int i = 0; i < m; i++) {
Ynew[k][i] = beta[k][0]; // 截距项
for (int j = 0; j < p; j++) {
Ynew[k][i] += beta[k][j+1] * Xnew[i][j];
}
}
}
return Ynew;
}
};
int main() {
// 生成样本数据
int n = 200;
int p = 3;
int q = 5;
vector<vector<double>> X(n, vector<double>(p));
vector<double> Y(n);
for (int i = 0; i < n; i++) {
X[i][0] = 1;
X[i][1] = rand() % 100 / 100.0;
X[i][2] = rand() % 100 / 100.0;
Y[i] = X[i][0] + X[i][1] + X[i][2] + rand() % 100 / 100.0;
}
vector<double> tau(q);
for (int i = 0; i < q; i++) {
tau[i] = (i+1) * 1.0 / (q+1);
}
// 进行回归
QuantileRegression qr(X, Y, tau);
qr.fit();
// 预测新数据
vector<vector<double>> Xnew(n, vector<double>(p));
for (int i = 0; i < n; i++) {
Xnew[i][0] = 1;
Xnew[i][1] = rand() % 100 / 100.0;
Xnew[i][2] = rand() % 100 / 100.0;
}
vector<vector<double>> Ynew = qr.predict(Xnew);
for (int i = 0; i < n; i++) {
for (int k = 0; k < q; k++) {
cout << "第 " << i+1 << " 个样本的第 " << k+1 << " 个分位数预测值为:" << Ynew[k][i] << endl;
}
}
return 0;
}
```
上述代码的实现中,`QuantileRegression` 类包含了三个私有变量:`n` 表示样本量,`p` 表示自变量数,`q` 表示分位数数;`X` 表示自变量矩阵,`Y` 表示因变量向量,`tau` 表示分位数向量;`beta` 表示回归系数矩阵。
`QuantileRegression` 类的主要方法有两个:`fit()` 方法用于拟合模型,`predict()` 方法用于预测新数据。
在 `fit()` 方法中,首先对样本进行排序并计算权重,然后对每一个分位数进行回归并求解回归系数。
在 `predict()` 方法中,对于新数据的每个样本和每个分位数,使用回归系数进行预测。
在案例中,我们生成了一个简单的三维线性模型,并使用该模型进行了分位数回归并预测了新数据。
阅读全文