用c++实现pca算法
时间: 2023-08-04 12:11:01 浏览: 49
PCA(Principal Component Analysis)是一种将高维数据降维到低维的算法,它可以去除数据中的冗余信息,提高数据处理的效率。下面是使用C++实现PCA算法的示例代码:
```c++
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
// 求平均向量
vector<double> getMeanVector(const vector<vector<double>> &data) {
int row = data.size();
int col = data[0].size();
vector<double> mean(col, 0.0);
for (int i = 0; i < row; i++) {
for (int j = 0; j < col; j++) {
mean[j] += data[i][j];
}
}
for (int j = 0; j < col; j++) {
mean[j] /= row;
}
return mean;
}
// 计算协方差矩阵
vector<vector<double>> getCovarianceMatrix(const vector<vector<double>> &data) {
int row = data.size();
int col = data[0].size();
vector<double> mean = getMeanVector(data);
vector<vector<double>> covariance(col, vector<double>(col, 0.0));
for (int i = 0; i < col; i++) {
for (int j = i; j < col; j++) {
double cov = 0.0;
for (int k = 0; k < row; k++) {
cov += (data[k][i] - mean[i]) * (data[k][j] - mean[j]);
}
covariance[i][j] = cov / (row - 1);
covariance[j][i] = cov / (row - 1);
}
}
return covariance;
}
// 计算特征值和特征向量
void getEigenValuesAndVectors(const vector<vector<double>> &covariance, vector<double> &eigenValues, vector<vector<double>> &eigenVectors) {
int size = covariance.size();
eigenVectors.resize(size, vector<double>(size, 0.0));
eigenValues.resize(size, 0.0);
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
eigenVectors[i][j] = (i == j) ? 1.0 : 0.0;
}
}
for (int i = 0; i < 1000; i++) { // 迭代1000次
double max = 0.0;
int row = 0, col = 0;
for (int j = 0; j < size; j++) {
for (int k = j + 1; k < size; k++) {
if (fabs(covariance[j][k]) > max) {
max = fabs(covariance[j][k]);
row = j;
col = k;
}
}
}
if (max == 0.0) {
break;
}
double theta = 0.5 * atan2(2 * covariance[row][col], covariance[row][row] - covariance[col][col]);
double s = sin(theta);
double c = cos(theta);
for (int j = 0; j < size; j++) {
double temp = c * eigenVectors[j][row] - s * eigenVectors[j][col];
eigenVectors[j][col] = s * eigenVectors[j][row] + c * eigenVectors[j][col];
eigenVectors[j][row] = temp;
temp = c * covariance[j][row] - s * covariance[j][col];
covariance[j][col] = s * covariance[j][row] + c * covariance[j][col];
covariance[j][row] = temp;
}
}
for (int i = 0; i < size; i++) {
eigenValues[i] = covariance[i][i];
}
}
// 计算降维后的数据
vector<vector<double>> getReducedData(const vector<vector<double>> &data, const vector<vector<double>> &eigenVectors, int k) {
int row = data.size();
int col = data[0].size();
vector<vector<double>> reducedData(row, vector<double>(k, 0.0));
for (int i = 0; i < row; i++) {
for (int j = 0; j < k; j++) {
for (int l = 0; l < col; l++) {
reducedData[i][j] += data[i][l] * eigenVectors[l][j];
}
}
}
return reducedData;
}
int main() {
vector<vector<double>> data = {{2, 3, 4}, {1, 2, 3}, {4, 5, 6}, {3, 4, 5}};
vector<vector<double>> covariance = getCovarianceMatrix(data);
vector<double> eigenValues;
vector<vector<double>> eigenVectors;
getEigenValuesAndVectors(covariance, eigenValues, eigenVectors);
vector<vector<double>> reducedData = getReducedData(data, eigenVectors, 2);
for (int i = 0; i < reducedData.size(); i++) {
for (int j = 0; j < reducedData[0].size(); j++) {
cout << reducedData[i][j] << " ";
}
cout << endl;
}
return 0;
}
```
在上面的示例代码中,输入数据是一个4行3列的矩阵,我们通过`getReducedData`函数将其降维到2维。其中,`getMeanVector`函数用于计算平均向量,`getCovarianceMatrix`函数用于计算协方差矩阵,`getEigenValuesAndVectors`函数用于计算特征值和特征向量,`getReducedData`函数用于计算降维后的数据。在`getEigenValuesAndVectors`函数中,我们使用Jacobi迭代方法来求解特征向量和特征值。