设计实现一个基于哈夫曼算法的压缩软件,统计文件中各字符出现的频次,设计哈夫曼编码,实现文件的压缩及解压缩。将建立哈夫曼树、实现哈夫曼编码、哈夫曼译码都定义成子函数的形式,用c语言写出并写出详细注释
时间: 2023-06-12 20:07:55 浏览: 114
用哈夫曼编码C语言实现文件压缩-参考模板.pdf
5星 · 资源好评率100%
以下是基于哈夫曼算法的压缩软件的具体实现步骤:
1. 统计文件中各字符出现的频次
可以使用一个数组来记录每个字符出现的次数,遍历整个文件,对于每个读取到的字符,将其对应的计数器加一。
2. 构建哈夫曼树
首先将每个字符的出现频次作为权值,构建一个森林,每个节点为一个单独的树。接着按照权值从小到大的顺序选择两个树合并成一棵更大的树,直到最后只剩下一棵树,这棵树就是哈夫曼树。
3. 实现哈夫曼编码
从根节点出发,对于每个左子树的路径标记为 0,对于每个右子树的路径标记为 1,直到到达叶子节点,每个叶子节点对应一个字符的编码。将这些编码存储到一个编码表中,用于压缩文件时使用。
4. 实现文件压缩
首先将原始文件中的每个字符替换为其对应的哈夫曼编码,然后将编码后的文件写入到新的文件中。
5. 实现文件解压缩
对于压缩后的文件,读取其中的每个比特位,并从根节点开始,沿着哈夫曼树的路径寻找对应的字符,直到到达叶子节点。将找到的字符写入到解压缩后的文件中。
以下是具体的 C 语言实现代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
// 定义哈夫曼树节点
struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
};
// 定义哈夫曼树堆
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHeapNode** array;
};
// 创建一个哈夫曼树节点
struct MinHeapNode* createNode(char data, unsigned freq) {
struct MinHeapNode* node = (struct MinHeapNode*) malloc(sizeof(struct MinHeapNode));
node->left = node->right = NULL;
node->data = data;
node->freq = freq;
return node;
}
// 创建一个空的哈夫曼树堆
struct MinHeap* createMinHeap(unsigned capacity) {
struct MinHeap* minHeap = (struct MinHeap*) malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (struct MinHeapNode**) malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
return minHeap;
}
// 交换两个节点的位置
void swap(struct MinHeapNode** a, struct MinHeapNode** b) {
struct MinHeapNode* t = *a;
*a = *b;
*b = t;
}
// 最小堆化
void minHeapify(struct MinHeap* minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
smallest = left;
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
smallest = right;
if (smallest != idx) {
swap(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
// 检查堆的大小是否为 1
int isSizeOne(struct MinHeap* minHeap) {
return (minHeap->size == 1);
}
// 抽取堆中最小的节点
struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
struct MinHeapNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// 插入一个新的节点到堆中
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
// 构建哈夫曼树
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
struct MinHeapNode *left, *right, *top;
struct MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
insertMinHeap(minHeap, createNode(data[i], freq[i]));
while (!isSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = createNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
// 将编码写入到哈夫曼树编码表中
void storeCodes(struct MinHeapNode* root, int arr[], int top, int codes[][MAX_TREE_HT]) {
if (root->left) {
arr[top] = 0;
storeCodes(root->left, arr, top + 1, codes);
}
if (root->right) {
arr[top] = 1;
storeCodes(root->right, arr, top + 1, codes);
}
if (!root->left && !root->right) {
for (int i = 0; i < top; ++i)
codes[(int) root->data][i] = arr[i];
}
}
// 将字符串转换为比特流
void convertToBits(char* input, int* bits, int* size, int codes[][MAX_TREE_HT]) {
int len = strlen(input);
int bitIdx = 0;
for (int i = 0; i < len; ++i) {
int c = input[i];
int j = 0;
while (codes[c][j] != -1) {
bits[bitIdx++] = codes[c][j++];
}
}
*size = bitIdx;
}
// 将比特流转换为字符串
void convertToString(int* bits, int size, char* output, struct MinHeapNode* root) {
struct MinHeapNode* p = root;
int outIdx = 0;
for (int i = 0; i < size; ++i) {
if (bits[i] == 0)
p = p->left;
else
p = p->right;
if (!p->left && !p->right) {
output[outIdx++] = p->data;
p = root;
}
}
output[outIdx] = '\0';
}
// 压缩文件
void compress(char* inputFile, char* outputFile) {
FILE* fpIn = fopen(inputFile, "r");
FILE* fpOut = fopen(outputFile, "wb");
char data[MAX_TREE_HT], c;
int freq[MAX_TREE_HT] = {0}, size = 0;
int codes[MAX_TREE_HT][MAX_TREE_HT];
int arr[MAX_TREE_HT];
int bitSize = 0, bitIdx = 0;
int bits[MAX_TREE_HT * MAX_TREE_HT];
char buffer = 0;
while ((c = fgetc(fpIn)) != EOF) {
data[size++] = c;
++freq[(int) c];
}
struct MinHeapNode* root = buildHuffmanTree(data, freq, size);
memset(codes, -1, sizeof(codes));
storeCodes(root, arr, 0, codes);
rewind(fpIn);
while ((c = fgetc(fpIn)) != EOF) {
int j = 0;
while (codes[(int) c][j] != -1) {
if (bitIdx == 8) {
fwrite(&buffer, sizeof(buffer), 1, fpOut);
buffer = 0;
bitIdx = 0;
}
buffer <<= 1;
buffer |= codes[(int) c][j];
++bitIdx;
++j;
}
}
if (bitIdx > 0) {
buffer <<= (8 - bitIdx);
fwrite(&buffer, sizeof(buffer), 1, fpOut);
}
fclose(fpIn);
fclose(fpOut);
}
// 解压缩文件
void decompress(char* inputFile, char* outputFile) {
FILE* fpIn = fopen(inputFile, "rb");
FILE* fpOut = fopen(outputFile, "w");
char data[MAX_TREE_HT], c;
int freq[MAX_TREE_HT] = {0}, size = 0;
int codes[MAX_TREE_HT][MAX_TREE_HT];
int arr[MAX_TREE_HT];
int bitIdx = 0;
int bits[MAX_TREE_HT * MAX_TREE_HT];
char output[MAX_TREE_HT];
int outIdx = 0;
while (fread(&c, sizeof(c), 1, fpIn) != 0) {
int i = 0;
for (i = 7; i >= 0; --i) {
int bit = (c >> i) & 1;
bits[bitIdx++] = bit;
if (bitIdx % 8 == 0) {
convertToString(bits + outIdx, 8, output, root);
outIdx += strlen(output);
fwrite(output, sizeof(char), strlen(output), fpOut);
outIdx = 0;
}
}
}
if (bitIdx % 8 != 0) {
while (bitIdx % 8 != 0)
bits[bitIdx++] = 0;
convertToString(bits + outIdx, bitIdx - outIdx, output, root);
fwrite(output, sizeof(char), strlen(output), fpOut);
}
fclose(fpIn);
fclose(fpOut);
}
int main() {
char inputFile[] = "input.txt";
char compressedFile[] = "compressed.bin";
char decompressedFile[] = "decompressed.txt";
// 压缩文件
compress(inputFile, compressedFile);
// 解压缩文件
decompress(compressedFile, decompressedFile);
return 0;
}
```
阅读全文