完成C++代码。已知数据文件名为“input1000.csv”。数据文件的内容和格式,简述如下:数据文件由表头(首行)和具体一行行的股票数据构成。第一行为表头,ts_code 表示股票代码,trade_date 表示交易日期,open 表示开盘价,high 表示最高价,low 表示最低价,close 表示收盘价,pre_close 表示临近收盘价,change 表示涨跌幅,pct_chg 表示涨跌百分比,vol 表示交易量,amount 表示交易金额。其余各行为实际数据,每一行的各个数据项(字段)用“,”隔开。对股票数据进行排序 (必须使用外排序,用于排序的内存大小限制为 32MB,此限制的检查,需要通过资源管理器展现。)对原文“input1000.csv” 进行外排序,排序标准是首先按照股票代码的字典序进行排序,如果股票代码相同则按照日期从小到大进行排序,输出文件取名output.txt,输出文件第一行为表头.
时间: 2024-03-17 18:44:54 浏览: 292
c++代码,数据处理
以下是用C++完成对“input1000.csv”进行外排序的代码:
```c++
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <algorithm>
#include <queue>
#include <cstdio>
#include <ctime>
using namespace std;
const int MAXN = 4000000;
const int MAXM = 200000;
struct Stock {
string ts_code;
string trade_date;
double open;
double high;
double low;
double close;
double pre_close;
double change;
double pct_chg;
double vol;
double amount;
};
struct Data {
string ts_code;
string trade_date;
int idx; // idx 表示该数据在文件中的行号
};
bool cmp(const Data& a, const Data& b) {
if (a.ts_code != b.ts_code) {
return a.ts_code < b.ts_code;
} else {
return a.trade_date < b.trade_date;
}
}
void merge_files(int k, int n) {
vector<ifstream> ifs(k);
vector<Data> data(k);
vector<Stock> stocks(k);
vector<int> flag(k, 1); // 标记每个文件是否读完
priority_queue<Data, vector<Data>, decltype(&cmp)> pq(cmp);
for (int i = 0; i < k; i++) {
ifs[i].open("temp" + to_string(i) + ".csv");
getline(ifs[i], data[i].ts_code, ',');
getline(ifs[i], data[i].trade_date, ',');
ifs[i] >> stocks[i].open >> stocks[i].high >> stocks[i].low >> stocks[i].close;
ifs[i] >> stocks[i].pre_close >> stocks[i].change >> stocks[i].pct_chg;
ifs[i] >> stocks[i].vol >> stocks[i].amount;
data[i].idx = i * n + 1;
pq.push(data[i]);
}
ofstream ofs("output.txt");
ofs << "ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount" << endl;
while (!pq.empty()) {
Data tmp = pq.top();
pq.pop();
ofs << stocks[tmp.idx % n].ts_code << "," << stocks[tmp.idx % n].trade_date << ",";
ofs << stocks[tmp.idx % n].open << "," << stocks[tmp.idx % n].high << ",";
ofs << stocks[tmp.idx % n].low << "," << stocks[tmp.idx % n].close << ",";
ofs << stocks[tmp.idx % n].pre_close << "," << stocks[tmp.idx % n].change << ",";
ofs << stocks[tmp.idx % n].pct_chg << "," << stocks[tmp.idx % n].vol << ",";
ofs << stocks[tmp.idx % n].amount << endl;
if (ifs[tmp.idx / n].peek() == EOF) {
flag[tmp.idx / n] = 0;
ifs[tmp.idx / n].close();
} else {
getline(ifs[tmp.idx / n], data[tmp.idx / n].ts_code, ',');
getline(ifs[tmp.idx / n], data[tmp.idx / n].trade_date, ',');
ifs[tmp.idx / n] >> stocks[tmp.idx % n].open >> stocks[tmp.idx % n].high >> stocks[tmp.idx % n].low >> stocks[tmp.idx % n].close;
ifs[tmp.idx / n] >> stocks[tmp.idx % n].pre_close >> stocks[tmp.idx % n].change >> stocks[tmp.idx % n].pct_chg;
ifs[tmp.idx / n] >> stocks[tmp.idx % n].vol >> stocks[tmp.idx % n].amount;
data[tmp.idx / n].idx++;
pq.push(data[tmp.idx / n]);
}
}
for (int i = 0; i < k; i++) {
if (flag[i] == 1) {
ifs[i].close();
}
remove(("temp" + to_string(i) + ".csv").c_str());
}
}
void external_sort() {
ifstream ifs("input1000.csv");
vector<string> buf(MAXM);
int cnt = 0;
while (!ifs.eof()) {
string line;
getline(ifs, line);
if (line.empty()) {
continue;
}
buf[cnt++] = line;
if (cnt == MAXM) {
sort(buf.begin(), buf.begin() + cnt);
ofstream ofs("temp" + to_string(cnt / MAXM - 1) + ".csv");
for (int i = 0; i < cnt; i++) {
ofs << buf[i] << endl;
}
ofs.close();
cnt = 0;
}
}
if (cnt > 0) {
sort(buf.begin(), buf.begin() + cnt);
ofstream ofs("temp" + to_string(cnt / MAXM) + ".csv");
for (int i = 0; i < cnt; i++) {
ofs << buf[i] << endl;
}
ofs.close();
}
ifs.close();
int k = ceil(32.0 * 1024 * 1024 / (MAXM * sizeof(Stock)));
int n = MAXN / k;
for (int i = 0; i < k; i++) {
ifstream ifs("temp" + to_string(i) + ".csv");
vector<Stock> stocks(n);
for (int j = 0; j < n && !ifs.eof(); j++) {
getline(ifs, stocks[j].ts_code, ',');
getline(ifs, stocks[j].trade_date, ',');
ifs >> stocks[j].open >> stocks[j].high >> stocks[j].low >> stocks[j].close;
ifs >> stocks[j].pre_close >> stocks[j].change >> stocks[j].pct_chg;
ifs >> stocks[j].vol >> stocks[j].amount;
ifs.ignore();
}
sort(stocks.begin(), stocks.begin() + min(n, (int)stocks.size()), [](const Stock& a, const Stock& b) {
if (a.ts_code != b.ts_code) {
return a.ts_code < b.ts_code;
} else {
return a.trade_date < b.trade_date;
}
});
ofstream ofs("temp" + to_string(i) + ".csv");
for (int j = 0; j < min(n, (int)stocks.size()); j++) {
ofs << stocks[j].ts_code << "," << stocks[j].trade_date << ",";
ofs << stocks[j].open << "," << stocks[j].high << ",";
ofs << stocks[j].low << "," << stocks[j].close << ",";
ofs << stocks[j].pre_close << "," << stocks[j].change << ",";
ofs << stocks[j].pct_chg << "," << stocks[j].vol << ",";
ofs << stocks[j].amount << endl;
}
ofs.close();
stocks.clear();
}
merge_files(k, n);
}
int main() {
clock_t start = clock();
external_sort();
clock_t end = clock();
cout << "Time used: " << (double)(end - start) / CLOCKS_PER_SEC << " s" << endl;
return 0;
}
```
该代码首先将“input1000.csv”文件分为多个文件,每个文件最多包含MAXM行数据。然后对每个文件进行内部排序,再将这些排好序的文件合并成一个文件,即“output.txt”。在合并的过程中,使用了外排序的思想,每次从每个文件中选出一个数据,将它们放入一个小根堆中,每次取出堆顶的元素并输出,再从它所在的文件中读取一个新的数据放入堆中。这样就能保证输出的数据是按照题目要求排序的。
阅读全文