词频统计哈夫曼编码译码c++
时间: 2024-05-14 14:10:46 浏览: 163
词频统计和哈夫曼编码是两个不同的概念,我可以分别为你介绍一下。
1. 词频统计
词频统计是指对一段文本进行分词,然后统计每个词语在文本中出现的频率。在C++中,可以使用STL中的map容器来实现词频统计,具体实现可以参考如下代码:
```
#include <iostream>
#include <map>
#include <string>
using namespace std;
int main()
{
map<string, int> wordFreq;
string text = "Hello, world! This is a test text.";
// 将文本按照空格分割为单词
int startPos = 0;
for (int i = 0; i < text.length(); i++) {
if (text[i] == ' ') {
string word = text.substr(startPos, i - startPos);
if (wordFreq.find(word) != wordFreq.end()) {
wordFreq[word]++;
} else {
wordFreq[word] = 1;
}
startPos = i + 1;
}
}
string word = text.substr(startPos);
if (wordFreq.find(word) != wordFreq.end()) {
wordFreq[word]++;
} else {
wordFreq[word] = 1;
}
// 输出每个单词的出现频率
for (auto it = wordFreq.begin(); it != wordFreq.end(); it++) {
cout << it->first << ": " << it->second << endl;
}
return 0;
}
```
2. 哈夫曼编码与译码
哈夫曼编码是一种用于数据压缩的算法,它通过将频率较高的字符用较短的编码表示,从而减小数据的存储空间。哈夫曼编码的过程包括两个步骤:构建哈夫曼树和生成编码。而哈夫曼译码则是将编码转换为原始数据的过程。
在C++中,可以先构建一个哈夫曼树,然后根据哈夫曼树生成每个字符对应的编码表,最后使用编码表对原始数据进行编码和译码。具体实现可以参考如下代码:
```
#include <iostream>
#include <queue>
#include <unordered_map>
#include <vector>
using namespace std;
// 定义哈夫曼树节点
struct Node {
char ch;
int freq;
Node *left, *right;
Node(char ch, int freq)
: ch(ch)
, freq(freq)
, left(nullptr)
, right(nullptr)
{
}
};
// 定义哈夫曼树节点比较函数
struct NodeCmp {
bool operator()(Node* a, Node* b) const { return a->freq > b->freq; }
};
// 构建哈夫曼树
Node* buildHuffmanTree(const string& data)
{
unordered_map<char, int> freqMap;
for (char ch : data) {
freqMap[ch]++;
}
priority_queue<Node*, vector<Node*>, NodeCmp> pq;
for (auto it = freqMap.begin(); it != freqMap.end(); it++) {
pq.push(new Node(it->first, it->second));
}
while (pq.size() > 1) {
Node* left = pq.top();
pq.pop();
Node* right = pq.top();
pq.pop();
Node* parent = new Node('\0', left->freq + right->freq);
parent->left = left;
parent->right = right;
pq.push(parent);
}
return pq.top();
}
// 生成哈夫曼编码表
void generateHuffmanCodeTable(Node* root, string code, unordered_map<char, string>& codeTable)
{
if (!root) {
return;
}
if (!root->left && !root->right) {
codeTable[root->ch] = code;
return;
}
generateHuffmanCodeTable(root->left, code + "0", codeTable);
generateHuffmanCodeTable(root->right, code + "1", codeTable);
}
// 对数据进行哈夫曼编码
string huffmanEncode(const string& data, const unordered_map<char, string>& codeTable)
{
string encodedData;
for (char ch : data) {
encodedData += codeTable.at(ch);
}
return encodedData;
}
// 对哈夫曼编码进行译码
string huffmanDecode(const string& encodedData, Node* root)
{
string decodedData;
Node* curNode = root;
for (char ch : encodedData) {
if (ch == '0') {
curNode = curNode->left;
} else {
curNode = curNode->right;
}
if (!curNode->left && !curNode->right) {
decodedData += curNode->ch;
curNode = root;
}
}
return decodedData;
}
int main()
{
string data = "hello world";
Node* huffmanTree = buildHuffmanTree(data);
unordered_map<char, string> codeTable;
generateHuffmanCodeTable(huffmanTree, "", codeTable);
string encodedData = huffmanEncode(data, codeTable);
cout << "Encoded data: " << encodedData << endl;
string decodedData = huffmanDecode(encodedData, huffmanTree);
cout << "Decoded data: " << decodedData << endl;
return 0;
}
```
阅读全文