利用哈夫曼压缩与解压缩文件的C++代码
时间: 2023-12-10 18:04:11 浏览: 134
哈夫曼编码是一种可变长度编码,它将出现频率较高的字符用较短的编码表示,出现频率较低的字符用较长的编码表示,从而达到压缩数据的目的。下面是利用哈夫曼编码进行文件压缩与解压缩的C++代码。
压缩代码:
```c++
#include <iostream>
#include <fstream>
#include <queue>
#include <vector>
#include <map>
#include <bitset>
#include <cstring>
using namespace std;
const int MAXN = 1000005;
struct Node {
int cnt, lson, rson;
} huff[MAXN];
struct Char {
unsigned char ch;
int freq;
bool operator < (const Char& c) const {
return freq > c.freq;
}
};
map<unsigned char, int> freq;
map<unsigned char, string> code;
void huff_build() {
priority_queue<Char> q;
for (auto it : freq) {
q.push((Char) {it.first, it.second});
}
int idx = 0;
while (q.size() > 1) {
Char a = q.top(); q.pop();
Char b = q.top(); q.pop();
huff[++idx].cnt = a.freq + b.freq;
huff[idx].lson = (a.ch == 0 ? a.freq : huff[a.ch].cnt);
huff[idx].rson = (b.ch == 0 ? b.freq : huff[b.ch].cnt);
if (a.ch != 0) {
code[a.ch] = (code[a.ch] == "" ? "0" : code[a.ch]);
for (int i = 0; i < code[a.ch].length(); i++) {
code[a.ch][i] = '0' + (code[a.ch][i] - '0') * 2;
}
code[a.ch] += '0';
}
if (b.ch != 0) {
code[b.ch] = (code[b.ch] == "" ? "1" : code[b.ch]);
for (int i = 0; i < code[b.ch].length(); i++) {
code[b.ch][i] = '0' + (code[b.ch][i] - '0') * 2;
}
code[b.ch] += '1';
}
q.push((Char) {0, huff[idx].cnt});
}
}
int main() {
ifstream fin("input.txt", ios::binary);
ofstream fout("output.txt", ios::binary);
unsigned char ch;
while (fin.read((char*) &ch, sizeof(ch))) {
freq[ch]++;
}
fin.clear();
fin.seekg(0, ios::beg);
huff_build();
unsigned char tmp = 0, pos = 7;
for (auto it : freq) {
fout.write((char*) &it.first, sizeof(it.first));
fout.write((char*) &it.second, sizeof(it.second));
}
int cnt = 0;
while (fin.read((char*) &ch, sizeof(ch))) {
string cur = code[ch];
for (int i = 0; i < cur.length(); i++) {
if (cur[i] == '1') {
tmp |= (1 << pos);
}
pos--;
if (pos < 0) {
pos = 7;
fout.write((char*) &tmp, sizeof(tmp));
tmp = 0;
}
}
cnt++;
}
if (pos != 7) {
fout.write((char*) &tmp, sizeof(tmp));
}
fin.close();
fout.close();
return 0;
}
```
解压缩代码:
```c++
#include <iostream>
#include <fstream>
#include <map>
#include <cstring>
using namespace std;
const int MAXN = 1000005;
struct Node {
int cnt, lson, rson;
} huff[MAXN];
map<unsigned char, int> freq;
map<unsigned char, string> code;
void huff_build() {
int idx = 0, ch, f;
while (cin >> ch >> f) {
freq[ch] = f;
}
for (auto it : freq) {
huff[++idx].cnt = it.second;
huff[idx].lson = huff[idx].rson = it.first;
code[it.first] = "";
}
while (idx > 1) {
int a, b;
for (int i = 1; i <= idx; i++) {
if (huff[i].cnt < huff[a].cnt) {
b = a;
a = i;
} else if (huff[i].cnt < huff[b].cnt) {
b = i;
}
}
huff[++idx].cnt = huff[a].cnt + huff[b].cnt;
huff[idx].lson = a;
huff[idx].rson = b;
for (int i = 0; i < code[huff[a].lson].length(); i++) {
code[huff[a].lson][i] = '0' + (code[huff[a].lson][i] - '0') * 2;
}
for (int i = 0; i < code[huff[b].lson].length(); i++) {
code[huff[b].lson][i] = '0' + (code[huff[b].lson][i] - '0') * 2 + 1;
}
code[huff[a].lson] += '0';
code[huff[b].lson] += '1';
swap(huff[b], huff[idx]);
}
}
int main() {
ifstream fin("output.txt", ios::binary);
ofstream fout("input.txt", ios::binary);
unsigned char ch, tmp = 0;
int pos = -1, idx = MAXN - 1;
huff_build();
int cnt = huff[idx].cnt;
while (fin.read((char*) &ch, sizeof(ch))) {
for (int i = 0; i < 8; i++) {
if (pos >= 0) {
tmp |= ((ch >> i) & 1) << pos;
pos--;
if (pos < 0) {
huff[idx--].lson = huff[idx--].rson = tmp;
tmp = 0;
pos = -1;
}
} else {
if (ch & (1 << i)) {
idx--;
}
if (huff[idx].lson == 0 && huff[idx].rson == 0) {
fout.write((char*) &huff[idx].lson, sizeof(huff[idx].lson));
idx = MAXN - 1;
cnt--;
if (cnt == 0) {
break;
}
}
}
}
}
fin.close();
fout.close();
return 0;
}
```
需要注意的是,压缩文件后需要将字符出现频率和哈夫曼编码写入输出文件,以便解压缩时使用。
阅读全文