c语言实现哈夫曼编码压缩
时间: 2023-07-26 13:13:08 浏览: 60
好的,实现哈夫曼编码压缩的步骤如下:
1. 统计字符频率:遍历待压缩的文件,统计每个字符出现的次数,将结果存储在一个频率表中。
2. 构建哈夫曼树:根据字符频率表构建哈夫曼树,构建过程中需要用到最小堆。
3. 生成编码表:根据哈夫曼树,生成每个字符对应的哈夫曼编码,将结果存储在一个编码表中。
4. 压缩文件:遍历待压缩的文件,将每个字符替换为其对应的哈夫曼编码,重新写入一个压缩后的文件中。
下面是一个简单的C语言实现:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
};
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHeapNode** array;
};
struct MinHeapNode* newNode(char data, unsigned freq)
{
struct MinHeapNode* temp
= (struct MinHeapNode*)malloc
(sizeof(struct MinHeapNode));
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
}
struct MinHeap* createMinHeap(unsigned capacity)
{
struct MinHeap* minHeap
= (struct MinHeap*)malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array
= (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
return minHeap;
}
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b)
{
struct MinHeapNode* t = *a;
*a = *b;
*b = t;
}
void minHeapify(struct MinHeap* minHeap, int idx)
{
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size &&
minHeap->array[left]->freq < minHeap->array[smallest]->freq)
smallest = left;
if (right < minHeap->size &&
minHeap->array[right]->freq < minHeap->array[smallest]->freq)
smallest = right;
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
int isSizeOne(struct MinHeap* minHeap)
{
return (minHeap->size == 1);
}
struct MinHeapNode* extractMin(struct MinHeap* minHeap)
{
struct MinHeapNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode)
{
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
void buildMinHeap(struct MinHeap* minHeap)
{
int n = minHeap->size - 1;
int i;
for (i = (n - 1) / 2; i >= 0; --i)
minHeapify(minHeap, i);
}
void printArr(int arr[], int n)
{
int i;
for (i = 0; i < n; ++i)
printf("%d", arr[i]);
printf("\n");
}
int isLeaf(struct MinHeapNode* root)
{
return !(root->left) && !(root->right);
}
struct MinHeap* createAndBuildMinHeap(char data[], int freq[], int size)
{
struct MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
minHeap->array[i] = newNode(data[i], freq[i]);
minHeap->size = size;
buildMinHeap(minHeap);
return minHeap;
}
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size)
{
struct MinHeapNode *left, *right, *top;
struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size);
while (!isSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
void printCodes(struct MinHeapNode* root, int arr[], int top)
{
if (root->left) {
arr[top] = 0;
printCodes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
printCodes(root->right, arr, top + 1);
}
if (isLeaf(root)) {
printf("%c: ", root->data);
printArr(arr, top);
}
}
void HuffmanCodes(char data[], int freq[], int size)
{
struct MinHeapNode* root
= buildHuffmanTree(data, freq, size);
int arr[MAX_TREE_HT], top = 0;
printCodes(root, arr, top);
}
void compress(char* input, char* output)
{
FILE* fp1 = fopen(input, "r");
if (fp1 == NULL) {
printf("File Not Found!\n");
return;
}
FILE* fp2 = fopen(output, "w");
char ch;
int freq[256] = { 0 };
while ((ch = fgetc(fp1)) != EOF) {
freq[ch]++;
}
char data[256] = { 0 };
int j = 0;
for (int i = 0; i < 256; i++) {
if (freq[i] != 0) {
data[j] = (char)i;
j++;
}
}
HuffmanCodes(data, freq, j);
rewind(fp1);
char buffer = 0, bit_count = 0;
while ((ch = fgetc(fp1)) != EOF) {
int len = strlen(codes[ch]);
for (int i = 0; i < len; i++) {
buffer = buffer << 1;
if (codes[ch][i] == '1') {
buffer = buffer | 1;
}
bit_count++;
if (bit_count == 8) {
fwrite(&buffer, 1, 1, fp2);
buffer = 0;
bit_count = 0;
}
}
}
if (bit_count != 0) {
buffer = buffer << (8 - bit_count);
fwrite(&buffer, 1, 1, fp2);
}
fclose(fp1);
fclose(fp2);
}
int main()
{
compress("test.txt", "test.huff");
return 0;
}
```
其中,compress函数实现了文件压缩的过程。在压缩过程中,先统计字符频率,再根据字符频率构建哈夫曼树,生成每个字符对应的哈夫曼编码,最后将每个字符替换为其对应的哈夫曼编码,写入一个压缩后的文件中。