用C语言编写,基于哈夫曼树的数据压缩算法,要求:输入一串字符串,根据给定的字符串中字符出现的频率建立相应哈夫曼树,构造哈夫曼编码表,在此基础上可以对待压缩文件进行压缩(即编码),同时可以对压缩后的二进制编码文件进行解压(即译码)。代码
时间: 2024-02-13 08:07:50 浏览: 102
好的,以下是基于哈夫曼树的数据压缩算法的C语言代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
// 定义哈夫曼树节点结构体
struct MinHeapNode {
char data; // 字符
unsigned freq; // 频率
struct MinHeapNode *left, *right; // 左右子节点
};
// 定义哈夫曼编码表结构体
struct HuffmanCode {
char data; // 字符
char *code; // 编码
};
// 定义哈夫曼编码表数组
struct HuffmanCode huffmanCodes[MAX_TREE_HT];
// 定义哈夫曼树节点最小堆结构体
struct MinHeap {
unsigned size; // 堆大小
unsigned capacity; // 堆容量
struct MinHeapNode **array; // 堆数组
};
// 创建一个哈夫曼树节点
struct MinHeapNode* newNode(char data, unsigned freq) {
struct MinHeapNode* node = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
node->left = node->right = NULL;
node->data = data;
node->freq = freq;
return node;
}
// 创建一个哈夫曼树节点最小堆
struct MinHeap* createMinHeap(unsigned capacity) {
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
return minHeap;
}
// 交换两个哈夫曼树节点
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
struct MinHeapNode* t = *a;
*a = *b;
*b = t;
}
// 维护最小堆的性质
void minHeapify(struct MinHeap* minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
smallest = left;
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
smallest = right;
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
// 判断最小堆中是否只剩下一个节点
int isSizeOne(struct MinHeap* minHeap) {
return (minHeap->size == 1);
}
// 从最小堆中取出频率最小的节点
struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
struct MinHeapNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// 插入一个节点到最小堆
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
// 构建哈夫曼树
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
struct MinHeapNode *left, *right, *top;
struct MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
insertMinHeap(minHeap, newNode(data[i], freq[i]));
while (!isSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
// 将哈夫曼编码存储到数组中
void storeCodes(struct MinHeapNode* root, char* str, int top) {
if (root->left) {
str[top] = '0';
storeCodes(root->left, str, top + 1);
}
if (root->right) {
str[top] = '1';
storeCodes(root->right, str, top + 1);
}
if (!root->left && !root->right) {
huffmanCodes[root->data].data = root->data;
huffmanCodes[root->data].code = (char*)malloc(sizeof(char) * (top + 1));
memcpy(huffmanCodes[root->data].code, str, top + 1);
}
}
// 构造哈夫曼编码表
void buildHuffmanCodes(struct MinHeapNode* root) {
char str[MAX_TREE_HT];
storeCodes(root, str, 0);
}
// 将字符串进行哈夫曼编码
void encodeString(char* str, FILE* fp) {
int i = 0;
while (str[i]) {
fwrite(huffmanCodes[str[i]].code, strlen(huffmanCodes[str[i]].code), 1, fp);
++i;
}
}
// 将二进制编码还原成原始字符串
void decodeString(struct MinHeapNode* root, char* code, FILE* fp) {
struct MinHeapNode* curr = root;
int i = 0;
while (code[i]) {
if (code[i] == '0')
curr = curr->left;
else
curr = curr->right;
if (!curr->left && !curr->right) {
fwrite(&curr->data, sizeof(curr->data), 1, fp);
curr = root;
}
++i;
}
}
// 主函数
int main() {
char data[] = "Huffman coding is a data compression algorithm.";
int freq[] = { 3, 5, 1, 1, 2, 3, 2, 1, 4, 1, 2, 2, 1, 5, 1, 3, 2, 2, 3, 2, 1, 1, 1, 1, 1, 1 };
int size = sizeof(data) / sizeof(data[0]);
// 构建哈夫曼树
struct MinHeapNode* root = buildHuffmanTree(data, freq, size);
// 构造哈夫曼编码表
buildHuffmanCodes(root);
// 对字符串进行哈夫曼编码并写入文件
FILE* fp = fopen("compressed.bin", "wb");
encodeString(data, fp);
fclose(fp);
// 从文件中读取哈夫曼编码并还原成原始字符串
fp = fopen("compressed.bin", "rb");
fseek(fp, 0L, SEEK_END);
int sizeOfFile = ftell(fp);
char* code = (char*)malloc(sizeOfFile + 1);
fseek(fp, 0L, SEEK_SET);
fread(code, sizeOfFile, 1, fp);
code[sizeOfFile] = '\0';
fclose(fp);
fp = fopen("decompressed.txt", "w");
decodeString(root, code, fp);
fclose(fp);
return 0;
}
```
该代码可以对给定字符串进行哈夫曼编码,并将编码后的二进制数据写入文件。同时,该代码还可以从文件中读取哈夫曼编码并还原成原始字符串。
阅读全文