有哈夫曼码表,如何对本地文本进行压缩C语言编程的代码
时间: 2023-11-22 22:54:49 浏览: 73
首先,需要将文本文件读入到程序中。然后,统计文本中各个字符的频率,并按照频率建立哈夫曼树。接下来,根据哈夫曼树生成哈夫曼编码表。最后,将文本内容进行哈夫曼编码,将编码后的二进制数据保存到文件中。
下面是一个简单的实现例子:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// 定义哈夫曼编码结构体
typedef struct HuffmanCode {
int freq; // 字符出现频率
char code[256]; // 哈夫曼编码
} HuffmanCode;
// 定义哈夫曼树节点结构体
typedef struct TreeNode {
int freq; // 字符出现频率
int symbol; // 字符
struct TreeNode *left; // 左子树
struct TreeNode *right; // 右子树
} TreeNode;
// 统计文本中各个字符的频率
int *calcFreq(FILE *fp, int *size) {
int *freq = (int *)calloc(256, sizeof(int));
if (freq == NULL) {
printf("error: memory allocation failed\n");
exit(EXIT_FAILURE);
}
int ch;
while ((ch = fgetc(fp)) != EOF) {
freq[ch]++;
}
*size = ftell(fp);
fseek(fp, 0, SEEK_SET);
return freq;
}
// 创建哈夫曼树
TreeNode *createHuffmanTree(int *freq) {
TreeNode **nodes = (TreeNode **)calloc(256, sizeof(TreeNode *));
if (nodes == NULL) {
printf("error: memory allocation failed\n");
exit(EXIT_FAILURE);
}
// 初始化节点
for (int i = 0; i < 256; i++) {
if (freq[i] > 0) {
nodes[i] = (TreeNode *)calloc(1, sizeof(TreeNode));
if (nodes[i] == NULL) {
printf("error: memory allocation failed\n");
exit(EXIT_FAILURE);
}
nodes[i]->freq = freq[i];
nodes[i]->symbol = i;
}
}
// 构建哈夫曼树
while (1) {
TreeNode *node1 = NULL;
TreeNode *node2 = NULL;
// 找到频率最小的两个节点
for (int i = 0; i < 256; i++) {
if (nodes[i] != NULL) {
if (node1 == NULL || nodes[i]->freq < node1->freq) {
node2 = node1;
node1 = nodes[i];
} else if (node2 == NULL || nodes[i]->freq < node2->freq) {
node2 = nodes[i];
}
}
}
if (node2 == NULL) {
break;
}
// 创建新节点
TreeNode *node = (TreeNode *)calloc(1, sizeof(TreeNode));
if (node == NULL) {
printf("error: memory allocation failed\n");
exit(EXIT_FAILURE);
}
node->freq = node1->freq + node2->freq;
node->left = node1;
node->right = node2;
nodes[node1->symbol] = node;
nodes[node2->symbol] = NULL;
}
// 返回根节点
return nodes[node1->symbol];
}
// 生成哈夫曼编码表
void createHuffmanCode(TreeNode *node, HuffmanCode *codes, char *code, int size) {
if (node->left == NULL && node->right == NULL) {
// 叶子节点,将哈夫曼编码赋值到编码表中
codes[node->symbol].freq = node->freq;
strcpy(codes[node->symbol].code, code);
} else {
// 非叶子节点,向左子树添加编码位 '0'
code[size] = '0';
createHuffmanCode(node->left, codes, code, size + 1);
// 向右子树添加编码位 '1'
code[size] = '1';
createHuffmanCode(node->right, codes, code, size + 1);
}
}
// 哈夫曼编码
void huffmanEncode(FILE *fp, FILE *outfp, HuffmanCode *codes) {
char *code = (char *)calloc(256, sizeof(char));
if (code == NULL) {
printf("error: memory allocation failed\n");
exit(EXIT_FAILURE);
}
int ch;
int pos = 0;
char buf = 0;
while ((ch = fgetc(fp)) != EOF) {
// 将哈夫曼编码拼接到缓冲区中
for (int i = 0; codes[ch].code[i] != '\0'; i++) {
if (codes[ch].code[i] == '0') {
buf = buf << 1;
} else {
buf = (buf << 1) | 1;
}
pos++;
if (pos == 8) {
// 缓冲区已满,写入文件中
fwrite(&buf, sizeof(char), 1, outfp);
pos = 0;
buf = 0;
}
}
}
// 写入剩余数据
if (pos > 0) {
buf = buf << (8 - pos);
fwrite(&buf, sizeof(char), 1, outfp);
}
}
int main(void) {
FILE *fp = fopen("test.txt", "rb");
if (fp == NULL) {
printf("error: file not found\n");
return EXIT_FAILURE;
}
int size;
int *freq = calcFreq(fp, &size);
TreeNode *root = createHuffmanTree(freq);
HuffmanCode *codes = (HuffmanCode *)calloc(256, sizeof(HuffmanCode));
if (codes == NULL) {
printf("error: memory allocation failed\n");
return EXIT_FAILURE;
}
char *code = (char *)calloc(256, sizeof(char));
if (code == NULL) {
printf("error: memory allocation failed\n");
return EXIT_FAILURE;
}
createHuffmanCode(root, codes, code, 0);
FILE *outfp = fopen("test.bin", "wb");
if (outfp == NULL) {
printf("error: file create failed\n");
return EXIT_FAILURE;
}
huffmanEncode(fp, outfp, codes);
fclose(outfp);
fclose(fp);
free(freq);
free(codes);
free(code);
return 0;
}
```
这个程序能够读取名为 `test.txt` 的文本文件,使用哈夫曼编码进行压缩,并将压缩后的二进制数据保存为名为 `test.bin` 的文件。
阅读全文