请用c++完成这道编程题
时间: 2023-08-05 21:09:51 浏览: 130
简单c++编程题
以下是使用C++实现K-Means聚类和K-NN分类算法的代码:
```c++
#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
// 特征向量
class FeatureVector {
public:
int classLabel; // 类别标签
vector<double> attributes; // 属性向量
// 计算两个特征向量之间的欧氏距离
double euclideanDistance(const FeatureVector& fv) const {
double dist = 0.0;
for (int i = 0; i < fv.attributes.size(); i++) {
dist += pow(attributes[i] - fv.attributes[i], 2);
}
return sqrt(dist);
}
};
// K-Means聚类算法
class KMeans {
public:
// 初始化
KMeans(int k, int maxIter) {
this->k = k;
this->maxIter = maxIter;
}
// 输入数据
void input(const vector<FeatureVector>& data) {
this->data = data;
}
// 聚类过程
void cluster(vector<FeatureVector>& centroids) {
// 随机选择k个初始质心
int n = data.size();
vector<int> indices(n);
for (int i = 0; i < n; i++) {
indices[i] = i;
}
random_shuffle(indices.begin(), indices.end());
for (int i = 0; i < k; i++) {
centroids.push_back(data[indices[i]]);
}
// 迭代聚类
int iter = 0;
while (iter < maxIter) {
// 初始化簇
vector<vector<FeatureVector> > clusters(k);
// 将每个样本分配到最近的簇
for (int i = 0; i < n; i++) {
FeatureVector& fv = data[i];
int nearestCentroid = -1;
double minDistance = numeric_limits<double>::max();
for (int j = 0; j < k; j++) {
double distance = fv.euclideanDistance(centroids[j]);
if (distance < minDistance) {
minDistance = distance;
nearestCentroid = j;
}
}
clusters[nearestCentroid].push_back(fv);
}
// 计算每个簇的质心
vector<FeatureVector> newCentroids(k);
for (int i = 0; i < k; i++) {
if (clusters[i].empty()) {
continue;
}
int m = clusters[i].size();
for (int j = 0; j < m; j++) {
FeatureVector& fv = clusters[i][j];
for (int l = 0; l < fv.attributes.size(); l++) {
newCentroids[i].attributes[l] += fv.attributes[l];
}
}
for (int l = 0; l < newCentroids[i].attributes.size(); l++) {
newCentroids[i].attributes[l] /= m;
}
}
// 判断是否收敛
bool converged = true;
for (int i = 0; i < k; i++) {
if (centroids[i].euclideanDistance(newCentroids[i]) > 1e-6) {
converged = false;
break;
}
}
if (converged) {
break;
}
centroids = newCentroids;
iter++;
}
}
private:
int k; // 簇数
int maxIter; // 最大迭代次数
vector<FeatureVector> data; // 数据集
};
// K-NN分类算法
class KNN {
public:
// 初始化
KNN(int k) {
this->k = k;
}
// 输入数据
void input(const vector<FeatureVector>& data) {
this->data = data;
}
// 分类过程
int classify(const FeatureVector& fv) {
// 计算与所有样本的距离
vector<pair<double, int> > distances;
for (int i = 0; i < data.size(); i++) {
double distance = fv.euclideanDistance(data[i]);
distances.push_back(make_pair(distance, data[i].classLabel));
}
// 取距离最近的k个样本
sort(distances.begin(), distances.end());
int count[2] = {0};
for (int i = 0; i < k; i++) {
count[distances[i].second]++;
}
// 返回出现次数最多的类别
return (count[0] > count[1] ? 0 : 1);
}
private:
int k; // k值
vector<FeatureVector> data; // 数据集
};
int main() {
// 读入数据集
int m, n, s;
cin >> m >> n >> s;
vector<FeatureVector> data(m);
for (int i = 0; i < m; i++) {
data[i].attributes.resize(n);
for (int j = 0; j < n; j++) {
cin >> data[i].attributes[j];
}
if (s > 0) {
cin >> data[i].classLabel;
}
}
// 读入特征名称和类别名称
vector<string> featureNames(n);
for (int j = 0; j < n; j++) {
cin >> featureNames[j];
}
vector<string> classLabels(s);
for (int i = 0; i < s; i++) {
cin >> classLabels[i];
}
// K-Means聚类
vector<FeatureVector> centroids;
KMeans kmeans(s, 100);
kmeans.input(data);
kmeans.cluster(centroids);
// K-NN分类
int k;
cin >> k;
FeatureVector fv;
fv.attributes.resize(n);
for (int j = 0; j < n; j++) {
cin >> fv.attributes[j];
}
KNN knn(k);
knn.input(centroids);
int classLabel = knn.classify(fv);
// 输出分类结果
cout << classLabels[classLabel] << endl;
return 0;
}
```
注意:在读入数据集时,如果s>0,表示数据集包含类别标签,否则表示数据集不包含类别标签。在K-Means聚类时,簇数应该等于类别数s。在K-NN分类时,样本集应该是K-Means聚类得到的质心集合。
阅读全文