用C++编写代码对地址"C:\Users\86139\Desktop\主成分作业 数据.csv"中的数据进行简单的主成分分析,避免使用第三方库
时间: 2023-09-10 12:04:43 浏览: 90
实现主成分分析的一般步骤如下:
1. 读取数据集
2. 对数据集进行中心化处理
3. 计算协方差矩阵
4. 对协方差矩阵进行特征值分解
5. 选择前k个最大的特征值及其对应的特征向量
6. 得到降维后的数据集
下面是一个简单的 C++ 程序实现:
```cpp
#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
using namespace std;
// 读取数据集
vector<vector<double>> read_csv(string filename) {
vector<vector<double>> data;
ifstream file(filename);
string line;
while (getline(file, line)) {
vector<double> row;
size_t pos = 0;
string token;
while ((pos = line.find(",")) != string::npos) {
token = line.substr(0, pos);
row.push_back(stod(token));
line.erase(0, pos + 1);
}
row.push_back(stod(line));
data.push_back(row);
}
return data;
}
// 计算平均值
vector<double> mean(vector<vector<double>>& data) {
vector<double> avg(data[0].size(), 0);
int n = data.size();
for (int i = 0; i < n; i++) {
for (int j = 0; j < data[0].size(); j++) {
avg[j] += data[i][j];
}
}
for (int j = 0; j < data[0].size(); j++) {
avg[j] /= n;
}
return avg;
}
// 中心化处理
void center(vector<vector<double>>& data, vector<double>& avg) {
int n = data.size();
for (int i = 0; i < n; i++) {
for (int j = 0; j < data[0].size(); j++) {
data[i][j] -= avg[j];
}
}
}
// 计算协方差矩阵
vector<vector<double>> covariance_matrix(vector<vector<double>>& data) {
int n = data.size();
int m = data[0].size();
vector<vector<double>> cov(m, vector<double>(m, 0));
for (int i = 0; i < m; i++) {
for (int j = i; j < m; j++) {
double sum = 0;
for (int k = 0; k < n; k++) {
sum += data[k][i] * data[k][j];
}
cov[i][j] = cov[j][i] = sum / (n - 1);
}
}
return cov;
}
// 特征值分解
void eig(vector<vector<double>>& a, vector<double>& w, vector<vector<double>>& v) {
int n = a.size();
int max_iter = 1000;
double eps = 1e-8;
w.resize(n);
v.resize(n, vector<double>(n, 0));
for (int i = 0; i < n; i++) {
v[i][i] = 1;
}
for (int iter = 0; iter < max_iter; iter++) {
double s = 0;
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
s += fabs(a[i][j]);
}
}
if (s == 0) {
break;
}
double t = 0.0;
if (iter < 3) {
t = s / (n * n);
} else {
double sum = 0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
sum += fabs(a[i][j]);
}
}
t = eps * sum / n / n;
}
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
double g = 100 * fabs(a[i][j]);
if (iter > 3 && fabs(w[i]) + g == fabs(w[i])
&& fabs(w[j]) + g == fabs(w[j])) {
a[i][j] = 0;
} else if (fabs(a[i][j]) > t) {
double h = w[j] - w[i];
double u = 0;
if (fabs(h) + g == fabs(h)) {
u = a[i][j] / h;
} else {
double theta = 0.5 * h / a[i][j];
u = 1 / (fabs(theta) + sqrt(1 + theta * theta));
if (theta < 0) {
u = -u;
}
}
double c = 1 / sqrt(1 + u * u);
double s = u * c;
double tau = s / (1 + c);
h = u * a[i][j];
w[i] -= h;
w[j] += h;
a[i][j] = 0;
for (int k = 0; k < i; k++) {
double x = a[k][i];
double y = a[k][j];
a[k][i] = x - s * (y + x * tau);
a[k][j] = y + s * (x - y * tau);
}
for (int k = i + 1; k < j; k++) {
double x = a[i][k];
double y = a[k][j];
a[i][k] = x - s * (y + x * tau);
a[k][j] = y + s * (x - y * tau);
}
for (int k = j + 1; k < n; k++) {
double x = a[i][k];
double y = a[j][k];
a[i][k] = x - s * (y + x * tau);
a[j][k] = y + s * (x - y * tau);
}
for (int k = 0; k < n; k++) {
double x = v[k][i];
double y = v[k][j];
v[k][i] = x - s * (y + x * tau);
v[k][j] = y + s * (x - y * tau);
}
}
}
}
}
}
// 主成分分析
vector<vector<double>> pca(vector<vector<double>>& data, int k) {
vector<double> avg = mean(data);
center(data, avg);
vector<vector<double>> cov = covariance_matrix(data);
vector<double> w;
vector<vector<double>> v;
eig(cov, w, v);
vector<vector<double>> res(data.size(), vector<double>(k, 0));
for (int i = 0; i < data.size(); i++) {
for (int j = 0; j < k; j++) {
for (int l = 0; l < data[0].size(); l++) {
res[i][j] += data[i][l] * v[l][j];
}
}
}
return res;
}
int main() {
vector<vector<double>> data = read_csv("C:\\Users\\86139\\Desktop\\主成分作业 数据.csv");
vector<vector<double>> pca_data = pca(data, 2);
for (auto& row : pca_data) {
for (auto& col : row) {
cout << col << " ";
}
cout << endl;
}
return 0;
}
```
在上述代码中,`read_csv()` 函数用于从 CSV 文件中读取数据集,`mean()` 函数用于计算平均值,`center()` 函数用于对数据集进行中心化处理,`covariance_matrix()` 函数用于计算协方差矩阵,`eig()` 函数用于进行特征值分解,`pca()` 函数用于进行主成分分析。在 `main()` 函数中,我们读取 CSV 文件并进行主成分分析,得到降维后的数据集并输出。
阅读全文