帮我用c语言写一个利用Huffan编码实现文件压缩的代码
时间: 2024-03-15 21:47:08 浏览: 158
好的,以下是一个简单的利用Huffman编码实现文件压缩的C语言代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// 定义Huffman树节点
typedef struct huffman_node {
int freq;
char data;
struct huffman_node *left;
struct huffman_node *right;
} huffman_node;
// 定义堆节点
typedef struct heap_node {
int size;
int capacity;
huffman_node **array;
} heap_node;
// 创建Huffman树节点
huffman_node *create_huffman_node(char data, int freq) {
huffman_node *node = (huffman_node *)malloc(sizeof(huffman_node));
node->data = data;
node->freq = freq;
node->left = NULL;
node->right = NULL;
return node;
}
// 创建堆节点
heap_node *create_heap(int capacity) {
heap_node *heap = (heap_node *)malloc(sizeof(heap_node));
heap->size = 0;
heap->capacity = capacity;
heap->array = (huffman_node **)malloc(sizeof(huffman_node *) * capacity);
return heap;
}
// 交换堆节点
void swap(huffman_node **a, huffman_node **b) {
huffman_node *temp = *a;
*a = *b;
*b = temp;
}
// 最小堆调整
void heapify(heap_node *heap, int index) {
int smallest = index;
int left = 2 * index + 1;
int right = 2 * index + 2;
if (left < heap->size && heap->array[left]->freq < heap->array[smallest]->freq) {
smallest = left;
}
if (right < heap->size && heap->array[right]->freq < heap->array[smallest]->freq) {
smallest = right;
}
if (smallest != index) {
swap(&heap->array[smallest], &heap->array[index]);
heapify(heap, smallest);
}
}
// 判断堆是否为空
int is_empty(heap_node *heap) {
return heap->size == 0;
}
// 取出堆中最小的节点
huffman_node *pop_min(heap_node *heap) {
huffman_node *min = heap->array[0];
heap->array[0] = heap->array[heap->size - 1];
heap->size--;
heapify(heap, 0);
return min;
}
// 插入节点到堆中
void insert_min_heap(heap_node *heap, huffman_node *node) {
heap->size++;
int i = heap->size - 1;
while (i > 0 && node->freq < heap->array[(i - 1) / 2]->freq) {
heap->array[i] = heap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
heap->array[i] = node;
}
// 判断节点是否是叶子节点
int is_leaf(huffman_node *node) {
return node->left == NULL && node->right == NULL;
}
// 创建Huffman树
huffman_node *build_huffman_tree(char data[], int freq[], int size) {
heap_node *heap = create_heap(size);
for (int i = 0; i < size; i++) {
huffman_node *node = create_huffman_node(data[i], freq[i]);
insert_min_heap(heap, node);
}
while (heap->size > 1) {
huffman_node *left = pop_min(heap);
huffman_node *right = pop_min(heap);
huffman_node *parent = create_huffman_node('\0', left->freq + right->freq);
parent->left = left;
parent->right = right;
insert_min_heap(heap, parent);
}
return pop_min(heap);
}
// 打印Huffman编码
void print_huffman_codes(huffman_node *root, int arr[], int top) {
if (root->left) {
arr[top] = 0;
print_huffman_codes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
print_huffman_codes(root->right, arr, top + 1);
}
if (is_leaf(root)) {
printf("%c: ", root->data);
for (int i = 0; i < top; i++) {
printf("%d", arr[i]);
}
printf("\n");
}
}
// 压缩文件
void compress_file(char *input_file, char *output_file) {
FILE *in_fp = fopen(input_file, "r");
FILE *out_fp = fopen(output_file, "w");
char ch;
int freq[256] = {0};
int total_chars = 0;
while ((ch = fgetc(in_fp)) != EOF) {
freq[ch]++;
total_chars++;
}
char data[256];
int index = 0;
for (int i = 0; i < 256; i++) {
if (freq[i] != 0) {
data[index++] = (char)i;
}
}
huffman_node *root = build_huffman_tree(data, freq, index);
int arr[256];
print_huffman_codes(root, arr, 0);
fseek(in_fp, 0, SEEK_SET);
char buffer = 0;
int bits_written = 0;
while ((ch = fgetc(in_fp)) != EOF) {
int *code = (int *)malloc(sizeof(int) * 256);
int bits_to_write = 0;
huffman_node *temp = root;
while (temp != NULL) {
if (temp->left == NULL && temp->right == NULL) {
code[bits_to_write] = -1;
break;
} else {
int bit = (ch >> (7 - bits_written)) & 1;
if (bit == 0) {
temp = temp->left;
} else {
temp = temp->right;
}
code[bits_to_write] = bit;
bits_to_write++;
bits_written++;
if (bits_written == 8) {
bits_written = 0;
ch = fgetc(in_fp);
}
}
}
for (int i = 0; i < bits_to_write; i++) {
if (code[i] == -1) {
break;
}
buffer = buffer << 1;
buffer = buffer | code[i];
bits_written++;
if (bits_written == 8) {
fwrite(&buffer, sizeof(char), 1, out_fp);
bits_written = 0;
buffer = 0;
}
}
free(code);
}
if (bits_written != 0) {
buffer = buffer << (8 - bits_written);
fwrite(&buffer, sizeof(char), 1, out_fp);
}
fclose(in_fp);
fclose(out_fp);
}
int main() {
char input_file[] = "input.txt";
char output_file[] = "output.bin";
compress_file(input_file, output_file);
return 0;
}
```
在这个代码中,我们定义了一个`huffman_node`结构体来表示Huffman树的节点,并定义了一个`heap_node`结构体来表示堆的节点。我们使用最小堆来构建Huffman树。我们首先将每个字符出现的频率存储到一个数组`freq[]`中,然后创建一个`huffman_node`节点的堆,将所有非零频率字符的节点插入到堆中。我们然后取出堆中最小的两个节点,将它们合并成一个父节点,并将父节点插入到堆中。重复这个过程,直到只剩下一个节点,这个节点就是Huffman树的根节点。
我们然后使用深度优先搜索遍历Huffman树,打印出每个字符的Huffman编码。我们使用一个数组`arr[]`来存储从根节点到当前节点的路径,如果当前节点是叶子节点,则打印出该节点的字符和它的Huffman编码。
在压缩文件时,我们首先读取输入文件中的每个字符,并使用Huffman编码替换它。我们使用一个`code[]`数组来存储字符的Huffman编码,并使用一个变量`bits_to_write`来记录编码中的位数。我们使用一个指针`temp`来遍历Huffman树,每次将当前字符的一个比特位添加到`code[]`数组中,并根据它向左或向右遍历Huffman树。当`temp`指向一个叶子节点时,我们将`code[]`数组中的所有比特位写入到输出文件中,直到遇到一个-1(它是`code[]`数组中的结束标志)。
我们使用一个`buffer`变量来缓存每个比特位,并在`bits_written`达到8时将`buffer`写入到输出文件中。如果输入文件中的字符不是8的倍数,则在最后一个字节中使用0填充。
阅读全文