用C语言写一篇哈夫曼编码,具有编码,打印编码,二进制译码功能,并注解
时间: 2023-07-19 14:53:49 浏览: 91
以下是一个使用 C 语言实现的哈夫曼编码与解码器,注释已经加上:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_SIZE 256 // 字符集大小
// 哈夫曼树节点
typedef struct node {
char c; // 字符
int freq; // 出现频率
struct node* left; // 左子节点
struct node* right; // 右子节点
} Node;
// 编码表节点
typedef struct code {
char c; // 字符
char* bits; // 二进制编码
} Code;
// 哈夫曼树节点堆
typedef struct heap {
int size; // 堆大小
Node** nodes; // 节点数组
} Heap;
// 创建哈夫曼树节点
Node* new_node(char c, int freq) {
Node* node = (Node*)malloc(sizeof(Node));
node->c = c;
node->freq = freq;
node->left = NULL;
node->right = NULL;
return node;
}
// 创建编码表节点
Code* new_code(char c, char* bits) {
Code* code = (Code*)malloc(sizeof(Code));
code->c = c;
code->bits = bits;
return code;
}
// 创建节点堆
Heap* new_heap(int capacity) {
Heap* heap = (Heap*)malloc(sizeof(Heap));
heap->size = 0;
heap->nodes = (Node**)malloc(sizeof(Node*) * capacity);
return heap;
}
// 向节点堆中插入节点
void heap_push(Heap* heap, Node* node) {
heap->nodes[heap->size++] = node;
int i = heap->size - 1;
while (i > 0) {
int j = (i - 1) / 2;
if (heap->nodes[i]->freq < heap->nodes[j]->freq) {
Node* temp = heap->nodes[i];
heap->nodes[i] = heap->nodes[j];
heap->nodes[j] = temp;
i = j;
} else {
break;
}
}
}
// 从节点堆中取出节点
Node* heap_pop(Heap* heap) {
if (heap->size == 0) {
return NULL;
}
Node* node = heap->nodes[0];
heap->nodes[0] = heap->nodes[heap->size - 1];
heap->size--;
int i = 0;
while (i * 2 + 1 < heap->size) {
int j = i * 2 + 1;
if (j + 1 < heap->size && heap->nodes[j]->freq > heap->nodes[j + 1]->freq) {
j++;
}
if (heap->nodes[i]->freq > heap->nodes[j]->freq) {
Node* temp = heap->nodes[i];
heap->nodes[i] = heap->nodes[j];
heap->nodes[j] = temp;
i = j;
} else {
break;
}
}
return node;
}
// 构建哈夫曼树
Node* build_tree(int* freq) {
Heap* heap = new_heap(MAX_SIZE);
for (int i = 0; i < MAX_SIZE; i++) {
if (freq[i] > 0) {
Node* node = new_node((char)i, freq[i]);
heap_push(heap, node);
}
}
while (heap->size > 1) {
Node* left = heap_pop(heap);
Node* right = heap_pop(heap);
Node* parent = new_node('\0', left->freq + right->freq);
parent->left = left;
parent->right = right;
heap_push(heap, parent);
}
Node* root = heap_pop(heap);
free(heap->nodes);
free(heap);
return root;
}
// 生成编码表
Code** get_codes(Node* root) {
Code** codes = (Code**)malloc(sizeof(Code*) * MAX_SIZE);
char bits[MAX_SIZE];
memset(bits, 0, sizeof(bits));
int len = 0;
get_codes_helper(root, codes, bits, len);
return codes;
}
// 生成编码表
void get_codes_helper(Node* node, Code** codes, char* bits, int len) {
if (node == NULL) {
return;
}
if (node->left == NULL && node->right == NULL) {
char* code_bits = (char*)malloc(sizeof(char) * (len + 1));
memcpy(code_bits, bits, sizeof(char) * len);
code_bits[len] = '\0';
codes[(int)node->c] = new_code(node->c, code_bits);
return;
}
bits[len] = '0';
get_codes_helper(node->left, codes, bits, len + 1);
bits[len] = '1';
get_codes_helper(node->right, codes, bits, len + 1);
}
// 释放编码表
void free_codes(Code** codes) {
for (int i = 0; i < MAX_SIZE; i++) {
if (codes[i] != NULL) {
free(codes[i]->bits);
free(codes[i]);
}
}
free(codes);
}
// 获取编码串长度
int get_encoded_size(char* text, Code** codes) {
int len = 0;
for (int i = 0; i < strlen(text); i++) {
len += strlen(codes[(int)text[i]]->bits);
}
return len;
}
// 编码文本
char* encode(char* text, Code** codes) {
int len = get_encoded_size(text, codes);
char* encoded = (char*)malloc(sizeof(char) * (len + 1));
int pos = 0;
for (int i = 0; i < strlen(text); i++) {
char* bits = codes[(int)text[i]]->bits;
memcpy(encoded + pos, bits, sizeof(char) * strlen(bits));
pos += strlen(bits);
}
encoded[len] = '\0';
return encoded;
}
// 译码文本
char* decode(char* encoded, Node* root) {
char* text = (char*)malloc(sizeof(char) * (strlen(encoded) + 1));
int pos = 0;
Node* node = root;
for (int i = 0; i < strlen(encoded); i++) {
if (encoded[i] == '0') {
node = node->left;
} else {
node = node->right;
}
if (node->left == NULL && node->right == NULL) {
text[pos++] = node->c;
node = root;
}
}
text[pos] = '\0';
return text;
}
// 测试代码
int main() {
char* text = "Hello, World!";
int freq[MAX_SIZE] = { 0 };
for (int i = 0; i < strlen(text); i++) {
freq[(int)text[i]]++;
}
Node* root = build_tree(freq);
Code** codes = get_codes(root);
char* encoded = encode(text, codes);
char* decoded = decode(encoded, root);
printf("Text: %s\n", text);
printf("Encoded: %s\n", encoded);
printf("Decoded: %s\n", decoded);
free(encoded);
free(decoded);
free_codes(codes);
return 0;
}
```
以上代码使用 C 语言实现了哈夫曼编码与解码器,可以将输入的文本进行编码,并将编码串进行解码。你只需要将文本传递给 `encode()` 函数,即可得到编码串,然后将编码串和哈夫曼树的根节点传递给 `decode()` 函数,即可得到原始文本。在代码中,我们使用一个数组 `freq` 来统计字符出现的频率,然后根据频率构建哈夫曼树,再根据哈夫曼树构建编码表,最后生成编码串。在 `decode()` 函数中,根据编码串和哈夫曼树对编码串进行解码。
阅读全文