C语言基于哈夫曼树的数据压缩算法代码
时间: 2023-10-04 07:05:54 浏览: 107
C语言-基于哈夫曼树的数据压缩算法.zip
以下是C语言基于哈夫曼树的数据压缩算法代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_FREQ 256
typedef struct {
unsigned char ch;
int freq;
} node_t;
node_t heap[MAX_FREQ];
int heap_size = 0;
void swap(node_t *a, node_t *b) {
node_t tmp = *a;
*a = *b;
*b = tmp;
}
void push_heap(node_t x) {
heap[++heap_size] = x;
int i = heap_size;
while (i > 1 && heap[i].freq < heap[i/2].freq) {
swap(&heap[i], &heap[i/2]);
i /= 2;
}
}
node_t pop_heap() {
node_t ret = heap[1];
heap[1] = heap[heap_size--];
int i = 1;
while (i*2 <= heap_size) {
int child = i*2;
if (child+1 <= heap_size && heap[child+1].freq < heap[child].freq)
child++;
if (heap[i].freq < heap[child].freq)
break;
swap(&heap[i], &heap[child]);
i = child;
}
return ret;
}
int freq[MAX_FREQ];
char code[MAX_FREQ][MAX_FREQ];
int code_len[MAX_FREQ];
void build_huffman_tree() {
while (heap_size > 1) {
node_t a = pop_heap();
node_t b = pop_heap();
node_t c = {0, a.freq + b.freq};
push_heap(c);
for (int i = 0; i < MAX_FREQ; i++) {
code[c.ch][i] = '\0';
if (code[a.ch][i] != '\0') {
code[c.ch][i+1] = '\0';
strcat(code[c.ch], code[a.ch]);
code_len[c.ch] = strlen(code[c.ch]);
}
if (code[b.ch][i] != '\0') {
code[c.ch][i+1] = '\0';
strcat(code[c.ch], code[b.ch]);
code_len[c.ch] = strlen(code[c.ch]);
}
}
}
}
int main() {
char str[] = "hello world";
int len = strlen(str);
for (int i = 0; i < len; i++)
freq[str[i]]++;
for (int i = 0; i < MAX_FREQ; i++) {
if (freq[i] > 0) {
node_t n = {i, freq[i]};
push_heap(n);
}
}
build_huffman_tree();
printf("Original string: %s\n", str);
printf("Huffman code:\n");
for (int i = 0; i < len; i++) {
printf("%s", code[str[i]]);
}
printf("\n");
return 0;
}
```
该程序首先统计给定字符串中每个字符出现的频率,然后使用哈夫曼树算法将字符编码为可变长度的二进制编码。程序使用了一个最小堆来维护频率最小的节点,因此该程序的时间复杂度为 $O(n \log n)$,其中 $n$ 是字符集大小。
阅读全文