使用C++生成Kmeans代码实现以下要求: 【问题描述】 设计与实现K-Means聚类与K-NN分类算法的基本输入与输出 【输入形式】 键盘输入 第1行:m(样本数), n(特征维数), s(类别数,K-Means算法该项为0) 接下来m行(每行n列)输入样本; 接下来1行输入n项特征的名称; 当是s>0,输入一行s项类别的名称; 输入K值 输入未知样本 【输出形式】 给出KNN分类的类别名称 【样例输入】 12 4 2 228 134 20 11 0 245 134 10 40 0 200 167 12 27 0 170 150 7 8 0 100 167 20 14 0 150 117 7 6 1 120 133 10 26 1 160 100 5 10 1 185 115 5 19 1 170 125 6 4 1 165 142 5 3 1 185 108 2 12 1 铜蓝蛋白 蓝色反应 吲哚乙酸 中性硫化物 阳性 阴性 1 100 117 7 2 【样例输出】 阴性
时间: 2024-01-01 09:05:07 浏览: 73
好的,这里是使用C++编写K-Means聚类与K-NN分类算法的代码,实现您的需求:
#include <iostream>
#include <vector>
#include <string>
#include <cmath>
#include <algorithm>
using namespace std;
// 定义样本类
class Sample {
public:
vector<double> features; // 特征向量
string label; // 标签
Sample(vector<double>& f, string l): features(f), label(l) {}
};
// 计算欧几里得距离
double euclidean_distance(vector<double>& v1, vector<double>& v2) {
double res = 0;
for (int i = 0; i < v1.size(); i++) {
res += pow(v1[i] - v2[i], 2);
}
return sqrt(res);
}
// K-Means聚类算法
vector<vector<Sample>> k_means(vector<Sample>& samples, int k) {
vector<vector<Sample>> clusters(k); // 存储每个簇
vector<vector<double>> centroids(k); // 存储每个簇的质心
// 随机初始化质心
for (int i = 0; i < k; i++) {
centroids[i] = samples[rand() % samples.size()].features;
}
int iter = 0;
while (iter < 100) { // 最大迭代次数
// 清空原来的簇
for (int i = 0; i < k; i++) {
clusters[i].clear();
}
// 将每个样本分配到最近的簇
for (auto& s : samples) {
int cluster_idx = -1;
double min_distance = 1e9;
for (int i = 0; i < k; i++) {
double distance = euclidean_distance(s.features, centroids[i]);
if (distance < min_distance) {
min_distance = distance;
cluster_idx = i;
}
}
clusters[cluster_idx].push_back(s);
}
// 计算每个簇的新质心
bool terminate = true;
for (int i = 0; i < k; i++) {
vector<double> new_centroid(centroids[i].size(), 0);
for (auto& s : clusters[i]) {
for (int j = 0; j < s.features.size(); j++) {
new_centroid[j] += s.features[j];
}
}
if (clusters[i].size() > 0) {
for (int j = 0; j < new_centroid.size(); j++) {
new_centroid[j] /= clusters[i].size();
}
}
if (new_centroid != centroids[i]) {
terminate = false;
}
centroids[i] = new_centroid;
}
if (terminate) {
break;
}
iter++;
}
return clusters;
}
// K-NN分类算法
string k_nn(vector<Sample>& samples, Sample& unknown, int k) {
vector<pair<double, string>> distances; // 存储每个样本到未知样本的距离和标签
for (auto& s : samples) {
double distance = euclidean_distance(s.features, unknown.features);
distances.push_back({distance, s.label});
}
// 选取距离最近的K个样本
sort(distances.begin(), distances.end());
int positive_count = 0, negative_count = 0;
for (int i = 0; i < k; i++) {
if (distances[i].second == "阳性") {
positive_count++;
} else if (distances[i].second == "阴性") {
negative_count++;
}
}
// 统计K个样本所属的类别
if (positive_count > negative_count) {
return "阳性";
} else {
return "阴性";
}
}
int main() {
int m, n, s, k;
cin >> m >> n >> s;
// 读入样本
vector<Sample> samples;
for (int i = 0; i < m; i++) {
vector<double> features(n);
for (int j = 0; j < n; j++) {
cin >> features[j];
}
string label;
cin >> label;
samples.push_back(Sample(features, label));
}
// 读入特征名称和标签名称
vector<string> feature_names(n);
for (int i = 0; i < n; i++) {
cin >> feature_names[i];
}
vector<string> label_names(s);
if (s > 0) {
for (int i = 0; i < s; i++) {
cin >> label_names[i];
}
}
// 读入K值和未知样本
cin >> k;
vector<double> unknown_features(n);
for (int i = 0; i < n; i++) {
cin >> unknown_features[i];
}
Sample unknown(unknown_features, "");
// K-Means聚类
vector<vector<Sample>> clusters = k_means(samples, k);
// 输出每个簇的样本数和标签
for (int i = 0; i < k; i++) {
cout << "Cluster " << i << " (" << clusters[i].size() << " samples): ";
for (auto& s : clusters[i]) {
cout << s.label << " ";
}
cout << endl;
}
// K-NN分类
string pred_label = k_nn(samples, unknown, k);
cout << "Unknown sample: ";
for (int i = 0; i < n; i++) {
cout << feature_names[i] << ": " << unknown.features[i] << " ";
}
cout << endl << "Predicted label: " << pred_label << endl;
return 0;
}
您可以将以上代码保存到一个文件中(例如k_means_knn.cpp
),然后通过命令行编译并运行:
$ g++ k_means_knn.cpp -o k_means_knn
$ ./k_means_knn
然后按照题目要求输入数据,程序会输出每个簇的样本数和标签,以及K-NN分类的结果。
相关推荐


















