费诺编码实现文本压缩C++
时间: 2023-08-12 19:12:13 浏览: 109
费诺编码是一种无损压缩算法,用于将文本数据压缩到更小的空间中。下面是一个简单的C++实现:
```cpp
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
// 定义一个结构体用于存储字符及出现次数
struct CharCount {
char c;
int count;
};
// 比较函数,按出现次数从大到小排序
bool cmp(const CharCount& a, const CharCount& b) {
return a.count > b.count;
}
// 计算字符出现次数
vector<CharCount> getCharCount(const string& str) {
vector<CharCount> charCount(256);
for (int i = 0; i < 256; i++) {
charCount[i].c = i;
charCount[i].count = 0;
}
for (auto c : str) {
charCount[c].count++;
}
sort(charCount.begin(), charCount.end(), cmp);
return charCount;
}
// 构建费诺树
struct Node {
int index;
int left;
int right;
};
vector<Node> buildFenwickTree(const vector<CharCount>& charCount) {
vector<Node> fenwickTree(charCount.size() * 2 - 1);
for (int i = 0; i < charCount.size(); i++) {
fenwickTree[i].index = i;
fenwickTree[i].left = -1;
fenwickTree[i].right = -1;
}
int n = charCount.size();
for (int i = n; i < fenwickTree.size(); i++) {
int min1 = -1, min2 = -1;
for (int j = 0; j < i; j++) {
if (fenwickTree[j].left == -1 && (min1 == -1 || charCount[fenwickTree[j].index].count < charCount[fenwickTree[min1].index].count)) {
min2 = min1;
min1 = j;
}
else if (fenwickTree[j].left == -1 && (min2 == -1 || charCount[fenwickTree[j].index].count < charCount[fenwickTree[min2].index].count)) {
min2 = j;
}
}
fenwickTree[i].index = n + i - charCount.size();
fenwickTree[i].left = min1;
fenwickTree[i].right = min2;
charCount[fenwickTree[i].index].count = charCount[fenwickTree[min1].index].count + charCount[fenwickTree[min2].index].count;
}
return fenwickTree;
}
// 递归获取费诺编码
void getFenwickCode(const vector<Node>& fenwickTree, vector<string>& code, string curCode, int index) {
if (fenwickTree[index].left == -1 && fenwickTree[index].right == -1) {
code[fenwickTree[index].index] = curCode;
return;
}
getFenwickCode(fenwickTree, code, curCode + '0', fenwickTree[index].left);
getFenwickCode(fenwickTree, code, curCode + '1', fenwickTree[index].right);
}
// 获取费诺编码
vector<string> getFenwickCode(const vector<CharCount>& charCount) {
vector<Node> fenwickTree = buildFenwickTree(charCount);
vector<string> code(charCount.size());
getFenwickCode(fenwickTree, code, "", fenwickTree.size() - 1);
return code;
}
// 压缩函数
string compress(const string& str) {
vector<CharCount> charCount = getCharCount(str);
vector<string> code = getFenwickCode(charCount);
string compressedStr;
for (auto c : str) {
compressedStr += code[c];
}
return compressedStr;
}
// 解压函数
string decompress(const string& compressedStr, const vector<CharCount>& charCount) {
vector<Node> fenwickTree = buildFenwickTree(charCount);
string originalStr;
int index = fenwickTree.size() - 1;
for (auto c : compressedStr) {
if (c == '0') {
index = fenwickTree[index].left;
}
else {
index = fenwickTree[index].right;
}
if (fenwickTree[index].left == -1 && fenwickTree[index].right == -1) {
originalStr += fenwickTree[index].index;
index = fenwickTree.size() - 1;
}
}
return originalStr;
}
int main() {
string str = "abracadabra";
string compressedStr = compress(str);
cout << "Compressed string: " << compressedStr << endl;
vector<CharCount> charCount = getCharCount(str);
string originalStr = decompress(compressedStr, charCount);
cout << "Original string: " << originalStr << endl;
return 0;
}
```
上述代码实现了费诺编码的压缩和解压缩。其中,`getCharCount()`函数用于计算字符出现次数,`buildFenwickTree()`函数用于构建费诺树,`getFenwickCode()`函数用于递归获取费诺编码,`compress()`函数用于压缩,`decompress()`函数用于解压缩。
阅读全文