用C语言利用Huffman树和Huffman编码实现一个无损数据压缩程序
时间: 2024-03-10 11:47:40 浏览: 194
C语言实现Huffman树,Huffman编码
5星 · 资源好评率100%
好的,这是一个非常有趣的问题。实现一个基于Huffman树和Huffman编码的无损数据压缩程序,可以分为以下几个步骤:
1. 统计每个字符出现的频率,构建Huffman树
首先,需要对待压缩的数据进行遍历,统计每个字符出现的频率。然后,使用这些频率构建Huffman树。这个过程可以使用堆来实现,具体实现可以参考哈夫曼编码的构造算法。
2. 生成Huffman编码表
构建好Huffman树之后,可以根据Huffman树的特性,生成每个字符对应的Huffman编码。具体实现可以使用递归的方式,在遍历Huffman树的过程中生成Huffman编码表。
3. 将数据进行压缩
有了Huffman编码表之后,就可以对待压缩的数据进行压缩了。具体实现可以将数据中的每个字符替换成对应的Huffman编码,然后将所有的编码拼接在一起,形成一个二进制字符串。
4. 将压缩后的数据写入文件
最后,将压缩后的二进制字符串写入文件中即可。
这里提供一份参考代码,仅供参考:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_CHAR_NUM 256
#define MAX_BIT_NUM 1000000
typedef struct TreeNode {
unsigned char data; // 存储字符
int freq; // 存储字符出现的频率
struct TreeNode *left;
struct TreeNode *right;
} TreeNode;
typedef struct Heap {
int size;
TreeNode *data[MAX_CHAR_NUM*2];
} Heap;
typedef struct HuffmanCode {
unsigned char data;
char code[MAX_BIT_NUM];
} HuffmanCode;
void swap(TreeNode **a, TreeNode **b) {
TreeNode *temp = *a;
*a = *b;
*b = temp;
}
void minHeapify(Heap *heap, int index) {
int left = index * 2, right = index * 2 + 1, smallest = index;
if (left <= heap->size && heap->data[left]->freq < heap->data[smallest]->freq)
smallest = left;
if (right <= heap->size && heap->data[right]->freq < heap->data[smallest]->freq)
smallest = right;
if (smallest != index) {
swap(&heap->data[index], &heap->data[smallest]);
minHeapify(heap, smallest);
}
}
void buildMinHeap(Heap *heap) {
int i;
for (i = heap->size / 2; i >= 1; i--)
minHeapify(heap, i);
}
TreeNode *createTreeNode(unsigned char data, int freq) {
TreeNode *newNode = (TreeNode*) malloc(sizeof(TreeNode));
newNode->data = data;
newNode->freq = freq;
newNode->left = NULL;
newNode->right = NULL;
return newNode;
}
Heap *createHeap() {
Heap *newHeap = (Heap*) malloc(sizeof(Heap));
newHeap->size = 0;
return newHeap;
}
void insertHeap(Heap *heap, TreeNode *node) {
heap->size++;
heap->data[heap->size] = node;
int i = heap->size;
while (i > 1 && heap->data[i]->freq < heap->data[i/2]->freq) {
swap(&heap->data[i], &heap->data[i/2]);
i = i / 2;
}
}
TreeNode *deleteMin(Heap *heap) {
TreeNode *minNode = heap->data[1];
heap->data[1] = heap->data[heap->size];
heap->size--;
minHeapify(heap, 1);
return minNode;
}
TreeNode *buildHuffmanTree(int freq[]) {
int i;
Heap *heap = createHeap();
for (i = 0; i < MAX_CHAR_NUM; i++)
if (freq[i] > 0)
insertHeap(heap, createTreeNode(i, freq[i]));
buildMinHeap(heap);
while (heap->size > 1) {
TreeNode *left = deleteMin(heap);
TreeNode *right = deleteMin(heap);
TreeNode *parent = createTreeNode('$', left->freq + right->freq);
parent->left = left;
parent->right = right;
insertHeap(heap, parent);
}
return deleteMin(heap);
}
void generateHuffmanCode(TreeNode *root, char code[], int depth, HuffmanCode huffmanCodes[]) {
if (root->left == NULL && root->right == NULL) {
huffmanCodes[root->data].data = root->data;
strcpy(huffmanCodes[root->data].code, code);
return;
}
code[depth] = '0';
generateHuffmanCode(root->left, code, depth+1, huffmanCodes);
code[depth] = '1';
generateHuffmanCode(root->right, code, depth+1, huffmanCodes);
}
void compressFile(char *inputFile, char *outputFile, HuffmanCode huffmanCodes[]) {
FILE *fin = fopen(inputFile, "rb");
FILE *fout = fopen(outputFile, "wb");
int freq[MAX_CHAR_NUM], i;
memset(freq, 0, sizeof(freq));
unsigned char c;
while (fread(&c, 1, 1, fin) > 0)
freq[c]++;
fseek(fin, 0, SEEK_SET);
TreeNode *root = buildHuffmanTree(freq);
char code[MAX_BIT_NUM];
generateHuffmanCode(root, code, 0, huffmanCodes);
int bitCount = 0;
char buffer = 0;
while (fread(&c, 1, 1, fin) > 0) {
for (i = 0; i < strlen(huffmanCodes[c].code); i++) {
buffer = buffer << 1;
if (huffmanCodes[c].code[i] == '1')
buffer |= 1;
bitCount++;
if (bitCount == 8) {
fwrite(&buffer, 1, 1, fout);
bitCount = 0;
buffer = 0;
}
}
}
if (bitCount > 0) {
buffer = buffer << (8 - bitCount);
fwrite(&buffer, 1, 1, fout);
}
fclose(fin);
fclose(fout);
}
void decompressFile(char *inputFile, char *outputFile, TreeNode *root) {
FILE *fin = fopen(inputFile, "rb");
FILE *fout = fopen(outputFile, "wb");
unsigned char c;
TreeNode *p = root;
while (fread(&c, 1, 1, fin) > 0) {
int i;
for (i = 7; i >= 0; i--) {
if ((c & (1 << i)) == 0)
p = p->left;
else
p = p->right;
if (p->left == NULL && p->right == NULL) {
fwrite(&p->data, 1, 1, fout);
p = root;
}
}
}
fclose(fin);
fclose(fout);
}
int main() {
char inputFile[] = "input.txt";
char compressedFile[] = "compressed.bin";
char decompressedFile[] = "decompressed.txt";
HuffmanCode huffmanCodes[MAX_CHAR_NUM];
memset(huffmanCodes, 0, sizeof(huffmanCodes));
compressFile(inputFile, compressedFile, huffmanCodes);
TreeNode *root = buildHuffmanTree(NULL);
decompressFile(compressedFile, decompressedFile, root);
return 0;
}
```
这份代码使用了堆来构建Huffman树,使用了递归的方式来生成Huffman编码表,并且实现了对文件进行压缩和解压缩的功能。需要注意的是,对于一个字符集大小为N的情况,Huffman树的构建时间复杂度为O(NlogN),压缩和解压缩的时间复杂度也为O(NlogN)。
阅读全文