输入一段100—200字的英文短文,存入一文件a中,编写一段C语言代码实现写函数统计短文出现的字母个数n及每个字母的出现次数,写函数以字母出现次数作权值,建Haffman树(n个叶子),给出每个字母的Haffman编码,用每个字母编码对原短文进行编码,码文存入文件b中,用Haffman树对b中码文进行译码,结果存入文件c中,比较a,c是否一致,以检验编码、译码的正确性。
时间: 2023-12-10 17:37:01 浏览: 126
哈夫曼树编码和译码,C语言实现
英文短文:
The quick brown fox jumps over the lazy dog. This sentence contains all 26 letters of the English alphabet. It is often used to test typewriters and computer keyboards, as well as to demonstrate fonts and printing types.
C语言代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LETTERS 26
// 结构体定义
typedef struct haffman_node{
char letter;
int freq;
char *code;
struct haffman_node *left;
struct haffman_node *right;
} HaffmanNode;
// 函数声明
void count_letters(char *filename, int *n, int freq[]);
void sort_nodes(HaffmanNode *nodes[], int size);
HaffmanNode* build_haffman_tree(HaffmanNode *nodes[], int size);
void generate_codes(HaffmanNode *root);
void encode_text(char *filename, HaffmanNode *nodes[]);
void decode_text(char *filename, HaffmanNode *root);
int main()
{
int n = 0;
int freq[MAX_LETTERS] = {0};
HaffmanNode *nodes[MAX_LETTERS];
count_letters("a", &n, freq);
// 初始化Haffman树节点
for (int i = 0; i < MAX_LETTERS; i++) {
nodes[i] = (HaffmanNode*)malloc(sizeof(HaffmanNode));
nodes[i]->letter = i + 'a';
nodes[i]->freq = freq[i];
nodes[i]->code = NULL;
nodes[i]->left = NULL;
nodes[i]->right = NULL;
}
sort_nodes(nodes, MAX_LETTERS);
HaffmanNode *root = build_haffman_tree(nodes, MAX_LETTERS);
generate_codes(root);
encode_text("a", nodes);
decode_text("b", root);
// 检验编码和译码是否正确
FILE *fa = fopen("a", "r");
FILE *fc = fopen("c", "r");
char ch_a, ch_c;
while ((ch_a = fgetc(fa)) != EOF && (ch_c = fgetc(fc)) != EOF) {
if (ch_a != ch_c) {
printf("编码或译码错误!\n");
break;
}
}
if (ch_a == EOF && ch_c == EOF) {
printf("编码和译码正确!\n");
}
// 释放动态分配的内存
for (int i = 0; i < MAX_LETTERS; i++) {
free(nodes[i]->code);
free(nodes[i]);
}
return 0;
}
// 统计字母个数和频率
void count_letters(char *filename, int *n, int freq[])
{
FILE *fp = fopen(filename, "r");
char ch;
while ((ch = fgetc(fp)) != EOF) {
if (ch >= 'a' && ch <= 'z') {
freq[ch - 'a']++;
(*n)++;
}
}
fclose(fp);
}
// 对Haffman树节点按频率排序
void sort_nodes(HaffmanNode *nodes[], int size)
{
for (int i = 0; i < size - 1; i++) {
for (int j = i + 1; j < size; j++) {
if (nodes[i]->freq > nodes[j]->freq) {
HaffmanNode *temp = nodes[i];
nodes[i] = nodes[j];
nodes[j] = temp;
}
}
}
}
// 构建Haffman树
HaffmanNode* build_haffman_tree(HaffmanNode *nodes[], int size)
{
while (size > 1) {
HaffmanNode *new_node = (HaffmanNode*)malloc(sizeof(HaffmanNode));
new_node->letter = '\0';
new_node->freq = nodes[0]->freq + nodes[1]->freq;
new_node->code = NULL;
new_node->left = nodes[0];
new_node->right = nodes[1];
nodes[0] = new_node;
for (int i = 1; i < size - 1; i++) {
nodes[i] = nodes[i+1];
}
size--;
sort_nodes(nodes, size);
}
return nodes[0];
}
// 生成Haffman编码
void generate_codes(HaffmanNode *root)
{
if (root == NULL) {
return;
}
if (root->left != NULL) {
root->left->code = (char*)malloc(strlen(root->code) + 2);
strcpy(root->left->code, root->code);
strcat(root->left->code, "0");
generate_codes(root->left);
}
if (root->right != NULL) {
root->right->code = (char*)malloc(strlen(root->code) + 2);
strcpy(root->right->code, root->code);
strcat(root->right->code, "1");
generate_codes(root->right);
}
}
// 对短文进行编码
void encode_text(char *filename, HaffmanNode *nodes[])
{
FILE *fp = fopen(filename, "r");
FILE *fout = fopen("b", "w");
char ch;
while ((ch = fgetc(fp)) != EOF) {
if (ch >= 'a' && ch <= 'z') {
fputs(nodes[ch-'a']->code, fout);
}
}
fclose(fp);
fclose(fout);
}
// 译码
void decode_text(char *filename, HaffmanNode *root)
{
FILE *fin = fopen("b", "r");
FILE *fout = fopen(filename, "w");
HaffmanNode *node = root;
char ch;
while ((ch = fgetc(fin)) != EOF) {
if (ch == '0') {
node = node->left;
} else if (ch == '1') {
node = node->right;
}
if (node->left == NULL && node->right == NULL) {
fputc(node->letter, fout);
node = root;
}
}
fclose(fin);
fclose(fout);
}
```
代码解释:
1. `count_letters()`函数统计短文出现的字母个数和频率;
2. `sort_nodes()`函数对Haffman树节点按频率从小到大排序;
3. `build_haffman_tree()`函数构建Haffman树;
4. `generate_codes()`函数生成每个字母的Haffman编码;
5. `encode_text()`函数对短文进行编码;
6. `decode_text()`函数对编码后的短文进行译码;
7. 在`main()`函数中调用上述函数,最后比较原短文和译码后的短文是否一致,以检验编码和译码的正确性;
8. 释放动态分配的内存。
注意事项:
1. 代码中使用了动态内存分配,需要谨慎处理,避免内存泄漏;
2. 代码中对于输入的短文只考虑了小写字母,如果需要支持大写字母或其他字符,需要做相应的修改;
3. 代码中生成的Haffman树是用数组实现的,不是最优的实现方式,如果需要实现更高效的Haffman树构建算法,可以参考其他资料。
阅读全文