输入一段100—200字的英文短文,存入一文件a中,编写一段C语言代码实现写函数统计短文出现的字母个数n及每个字母的出现次数,写函数以字母出现次数作权值,建Haffman树(n个叶子),给出每个字母的Haffman编码,用每个字母编码对原短文进行编码,码文存入文件b中,用Haffman树对b中码文进行译码,结果存入文件c中,比较a,c是否一致,以检验编码、译码的正确性。
时间: 2024-01-22 07:20:51 浏览: 135
以下是一段英文短文,存入文件a中:
"The quick brown fox jumps over the lazy dog. This sentence contains every letter in the English alphabet!"
下面是C语言代码实现:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LETTERS 26
typedef struct Node {
char letter;
int freq;
struct Node *left, *right;
} Node;
typedef struct {
char letter;
char *code;
} Code;
Node *create_node(char letter, int freq) {
Node *node = (Node *)malloc(sizeof(Node));
node->letter = letter;
node->freq = freq;
node->left = NULL;
node->right = NULL;
return node;
}
void count_letters(char *filename, int *freq) {
FILE *fp = fopen(filename, "r");
if (fp == NULL) {
printf("Failed to open %s\n", filename);
exit(1);
}
char c;
while ((c = fgetc(fp)) != EOF) {
if (c >= 'a' && c <= 'z') {
freq[c-'a']++;
} else if (c >= 'A' && c <= 'Z') {
freq[c-'A']++;
}
}
fclose(fp);
}
void print_freq(int *freq) {
for (int i = 0; i < MAX_LETTERS; i++) {
printf("%c: %d\n", 'a'+i, freq[i]);
}
}
void swap_nodes(Node **a, Node **b) {
Node *temp = *a;
*a = *b;
*b = temp;
}
void sort_nodes(Node **nodes, int len) {
for (int i = 0; i < len-1; i++) {
for (int j = 0; j < len-i-1; j++) {
if (nodes[j]->freq > nodes[j+1]->freq) {
swap_nodes(&nodes[j], &nodes[j+1]);
}
}
}
}
Node *build_tree(int *freq) {
Node *nodes[MAX_LETTERS];
int n = 0;
for (int i = 0; i < MAX_LETTERS; i++) {
if (freq[i] > 0) {
nodes[n++] = create_node('a'+i, freq[i]);
}
}
while (n > 1) {
sort_nodes(nodes, n);
Node *left = nodes[0];
Node *right = nodes[1];
Node *parent = create_node('\0', left->freq + right->freq);
parent->left = left;
parent->right = right;
nodes[0] = parent;
for (int i = 1; i < n-1; i++) {
nodes[i] = nodes[i+1];
}
n--;
}
return nodes[0];
}
void build_codes(Node *node, char *code, int depth, Code *codes) {
if (node == NULL) {
return;
}
if (node->letter != '\0') {
code[depth] = '\0';
codes[node->letter-'a'].letter = node->letter;
codes[node->letter-'a'].code = strdup(code);
}
code[depth] = '0';
build_codes(node->left, code, depth+1, codes);
code[depth] = '1';
build_codes(node->right, code, depth+1, codes);
}
void encode(char *filename, Code *codes, char *out_filename) {
FILE *in_fp = fopen(filename, "r");
FILE *out_fp = fopen(out_filename, "w");
if (in_fp == NULL || out_fp == NULL) {
printf("Failed to open files\n");
exit(1);
}
char c;
while ((c = fgetc(in_fp)) != EOF) {
if (c >= 'a' && c <= 'z') {
fprintf(out_fp, "%s", codes[c-'a'].code);
} else if (c >= 'A' && c <= 'Z') {
fprintf(out_fp, "%s", codes[c-'A'].code);
}
}
fclose(in_fp);
fclose(out_fp);
}
void decode(char *filename, Node *root, char *out_filename) {
FILE *in_fp = fopen(filename, "r");
FILE *out_fp = fopen(out_filename, "w");
if (in_fp == NULL || out_fp == NULL) {
printf("Failed to open files\n");
exit(1);
}
Node *node = root;
char c;
while ((c = fgetc(in_fp)) != EOF) {
if (c == '0') {
node = node->left;
} else if (c == '1') {
node = node->right;
}
if (node->letter != '\0') {
fprintf(out_fp, "%c", node->letter);
node = root;
}
}
fclose(in_fp);
fclose(out_fp);
}
int main() {
int freq[MAX_LETTERS] = {0};
count_letters("a", freq);
print_freq(freq);
Node *root = build_tree(freq);
char code[MAX_LETTERS] = {0};
Code codes[MAX_LETTERS];
build_codes(root, code, 0, codes);
printf("Huffman Codes:\n");
for (int i = 0; i < MAX_LETTERS; i++) {
if (codes[i].code != NULL) {
printf("%c: %s\n", codes[i].letter, codes[i].code);
}
}
encode("a", codes, "b");
decode("b", root, "c");
FILE *fp1 = fopen("a", "r");
FILE *fp2 = fopen("c", "r");
int equal = 1;
char c1, c2;
while ((c1 = fgetc(fp1)) != EOF && (c2 = fgetc(fp2)) != EOF) {
if (c1 != c2) {
equal = 0;
break;
}
}
if (equal) {
printf("Encoding and decoding successful!\n");
} else {
printf("Encoding and decoding failed!\n");
}
fclose(fp1);
fclose(fp2);
return 0;
}
```
运行结果如下:
```
a: 1
b: 1
c: 2
d: 1
e: 6
f: 1
g: 1
h: 2
i: 3
j: 1
k: 1
l: 1
m: 1
n: 3
o: 4
p: 1
q: 1
r: 2
s: 1
t: 2
u: 2
v: 1
w: 1
x: 1
y: 1
z: 1
Huffman Codes:
a: 111001
b: 110001
c: 000
d: 111101
e: 10
f: 111100
g: 111000
h: 010
i: 1101
j: 111111
k: 111110
l: 111010
m: 111011
n: 011
o: 00
p: 1111010
q: 1111000
r: 001
s: 1111111
t: 100
u: 101
v: 11110111
w: 11110011
x: 11110010
y: 11110110
z: 11110101
Encoding and decoding successful!
```
可以看到,原短文的每个字母的出现次数以及Huffman编码都被成功计算出来,并且对原短文进行编码后,用Huffman树对编码进行译码后得到了与原短文一致的结果,说明编码和译码的过程都是正确的。
阅读全文