设计与实现K-Means聚类与K-NN分类算法的基本输入与输出 【输入形式】 键盘输入 第1行:m(样本数), n(特征维数), s(类别数,K-Means算法该项为0) 接下来m行(每行n列)输入样本; 接下来1行输入n项特征的名称; 当是s>0,输入一行s项类别的名称; 输入K值 输入未知样本 【输出形式】 给出KNN分类的类别名称 【样例输入】 12 4 2 228 134 20 11 0 245 134 10 40 0 200 167 12 27 0 170 150 7 8 0 100 167 20 14 0 150 117 7 6 1 120 133 10 26 1 160 100 5 10 1 185 115 5 19 1 170 125 6 4 1 165 142 5 3 1 185 108 2 12 1 铜蓝蛋白 蓝色反应 吲哚乙酸 中性硫化物 阳性 阴性 1 100 117 7 2 【样例输出】 阴性设计c++代码
时间: 2024-01-07 11:03:39 浏览: 139
以下是C++代码实现K-Means聚类与K-NN分类算法的基本输入与输出:
```cpp
#include <iostream>
#include <vector>
#include <string>
#include <map>
#include <cmath>
using namespace std;
// 样本类,存储样本的特征向量和所属簇的编号
class Sample {
public:
vector<double> features; // 特征向量
int cluster; // 所属簇的编号
};
// 距离计算函数,计算两个样本之间的欧几里得距离
double distance(const Sample& s1, const Sample& s2) {
double sum = 0;
for (int i = 0; i < s1.features.size(); i++) {
sum += pow(s1.features[i] - s2.features[i], 2);
}
return sqrt(sum);
}
// K-Means聚类算法
vector<Sample> kMeans(const vector<Sample>& samples, int k) {
// 初始化聚类中心,随机选择k个样本作为聚类中心
vector<Sample> centers;
for (int i = 0; i < k; i++) {
centers.push_back(samples[rand() % samples.size()]);
}
// 迭代聚类过程
bool changed = true;
while (changed) {
changed = false;
// 将样本分配到距离其最近的聚类中心所在的簇中
for (int i = 0; i < samples.size(); i++) {
double min_dist = distance(samples[i], centers[0]);
int min_index = 0;
for (int j = 1; j < centers.size(); j++) {
double dist = distance(samples[i], centers[j]);
if (dist < min_dist) {
min_dist = dist;
min_index = j;
}
}
if (samples[i].cluster != min_index) {
samples[i].cluster = min_index;
changed = true;
}
}
// 重新计算每个簇的聚类中心
for (int i = 0; i < centers.size(); i++) {
vector<double> sum(centers[i].features.size(), 0);
int count = 0;
for (int j = 0; j < samples.size(); j++) {
if (samples[j].cluster == i) {
for (int k = 0; k < samples[j].features.size(); k++) {
sum[k] += samples[j].features[k];
}
count++;
}
}
if (count > 0) {
for (int k = 0; k < sum.size(); k++) {
centers[i].features[k] = sum[k] / count;
}
}
}
}
// 返回聚类结果
return samples;
}
// K-NN分类算法
string kNN(const vector<Sample>& samples, const vector<string>& feature_names, const vector<string>& class_names, int k, const Sample& unknown_sample) {
// 计算未知样本与每个训练样本之间的距离
vector<pair<double, int>> distances;
for (int i = 0; i < samples.size(); i++) {
double dist = distance(samples[i], unknown_sample);
distances.push_back(make_pair(dist, i));
}
// 对距离进行排序,选择前k个最近的样本
sort(distances.begin(), distances.end());
// 统计前k个最近的样本所属类别的数量
map<string, int> class_counts;
for (int i = 0; i < k; i++) {
int index = distances[i].second;
string class_name = class_names[samples[index].cluster];
if (class_counts.find(class_name) == class_counts.end()) {
class_counts[class_name] = 1;
} else {
class_counts[class_name]++;
}
}
// 找到数量最多的类别
string max_class_name = "";
int max_count = 0;
for (auto it = class_counts.begin(); it != class_counts.end(); it++) {
if (it->second > max_count) {
max_count = it->second;
max_class_name = it->first;
}
}
// 返回分类结果
return max_class_name;
}
int main() {
// 读入数据
int m, n, s;
cin >> m >> n >> s;
vector<Sample> samples(m);
for (int i = 0; i < m; i++) {
samples[i].features.resize(n);
samples[i].cluster = 0;
for (int j = 0; j < n; j++) {
cin >> samples[i].features[j];
}
}
vector<string> feature_names(n);
for (int i = 0; i < n; i++) {
cin >> feature_names[i];
}
vector<string> class_names;
if (s > 0) {
class_names.resize(s);
for (int i = 0; i < s; i++) {
cin >> class_names[i];
}
}
int k;
cin >> k;
Sample unknown_sample;
unknown_sample.features.resize(n);
for (int i = 0; i < n; i++) {
cin >> unknown_sample.features[i];
}
// K-Means聚类
if (s == 0) {
vector<Sample> clusters = kMeans(samples, k);
for (int i = 0; i < clusters.size(); i++) {
for (int j = 0; j < n; j++) {
cout << clusters[i].features[j];
if (j < n - 1) {
cout << " ";
}
}
cout << " " << clusters[i].cluster << endl;
}
}
// K-NN分类
else {
string class_name = kNN(samples, feature_names, class_names, k, unknown_sample);
cout << class_name << endl;
}
return 0;
}
```
注意,此代码实现了K-Means聚类和K-NN分类算法的基本功能,但可能需要根据具体需求进行调整和优化。
阅读全文