利用C语言编写一个完整代码。具体要求为:从键盘任意输入一个带空格由英文字母构成的文本字符串,例如“ilikelearningdatastructurecourse”,首先根据该字符串创建Huffman树,之后进行Huffman编码,然后把该字符串的Huffman编码结果向屏幕输出。能够利用前述的Huffman编码结果对某0-1码流进行解码得到字符串,并作出详细注释
时间: 2024-03-07 22:48:08 浏览: 72
以下是实现以上功能的完整代码,我会尽量详细注释,以方便您理解。
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
/* 定义二叉树节点结构体 */
typedef struct TreeNode {
char data; /* 节点存储的字符 */
int weight; /* 节点权重(出现次数) */
struct TreeNode *left; /* 左子节点 */
struct TreeNode *right; /* 右子节点 */
} TreeNode;
/* 定义哈夫曼树结构体 */
typedef struct HuffmanTree {
TreeNode *root; /* 根节点 */
} HuffmanTree;
/* 哈夫曼编码表结构体 */
typedef struct HuffmanCode {
char data; /* 字符 */
char *code; /* 编码 */
struct HuffmanCode *next; /* 指向下一个编码表节点的指针 */
} HuffmanCode;
/* 定义哈夫曼编码表链表结构体 */
typedef struct HuffmanCodeList {
HuffmanCode *head; /* 头节点指针 */
HuffmanCode *tail; /* 尾节点指针 */
} HuffmanCodeList;
/* 创建一个新的二叉树节点 */
TreeNode *newNode(char data, int weight) {
TreeNode *node = (TreeNode*)malloc(sizeof(TreeNode));
node->data = data;
node->weight = weight;
node->left = NULL;
node->right = NULL;
return node;
}
/* 按照节点权重从小到大排序的比较函数 */
int cmp(const void *a, const void *b) {
TreeNode **pa = (TreeNode**)a;
TreeNode **pb = (TreeNode**)b;
return (*pa)->weight - (*pb)->weight;
}
/* 从文本字符串中创建哈夫曼树 */
HuffmanTree *createHuffmanTree(char *str) {
int len = strlen(str);
int count[26] = {0}; /* 统计每个字母出现的次数 */
for (int i = 0; i < len; i++) {
if (isalpha(str[i])) {
count[str[i] - 'a']++;
}
}
/* 将每个出现过的字母作为一个节点,构成森林 */
TreeNode *forest[26];
int forestSize = 0;
for (int i = 0; i < 26; i++) {
if (count[i] > 0) {
forest[forestSize++] = newNode(i + 'a', count[i]);
}
}
/* 不断将森林中的节点合并,直到只剩下一棵树为止 */
while (forestSize > 1) {
/* 找到森林中权重最小的两个节点 */
qsort(forest, forestSize, sizeof(TreeNode*), cmp);
TreeNode *left = forest[0];
TreeNode *right = forest[1];
/* 创建一个新的节点,作为这两个节点的父节点 */
TreeNode *parent = newNode('\0', left->weight + right->weight);
parent->left = left;
parent->right = right;
/* 将这个新的节点加入森林中 */
forest[0] = parent;
for (int i = 1; i < forestSize - 1; i++) {
forest[i] = forest[i + 1];
}
forestSize--;
}
/* 最后剩下的这棵树即为哈夫曼树 */
HuffmanTree *tree = (HuffmanTree*)malloc(sizeof(HuffmanTree));
tree->root = forest[0];
return tree;
}
/* 递归生成哈夫曼编码表 */
void generateHuffmanCodeList(TreeNode *node, HuffmanCodeList *list, char *code, int depth) {
if (node == NULL) {
return;
}
/* 当前节点是叶子节点,将它的字符和编码加入编码表中 */
if (node->left == NULL && node->right == NULL) {
HuffmanCode *huffmanCode = (HuffmanCode*)malloc(sizeof(HuffmanCode));
huffmanCode->data = node->data;
huffmanCode->code = (char*)malloc(sizeof(char) * (depth + 1));
huffmanCode->code[depth] = '\0';
strncpy(huffmanCode->code, code, depth);
if (list->head == NULL) {
list->head = huffmanCode;
list->tail = huffmanCode;
} else {
list->tail->next = huffmanCode;
list->tail = huffmanCode;
}
}
/* 分别处理左子树和右子树 */
code[depth] = '0';
generateHuffmanCodeList(node->left, list, code, depth + 1);
code[depth] = '1';
generateHuffmanCodeList(node->right, list, code, depth + 1);
}
/* 生成哈夫曼编码表 */
HuffmanCodeList *generateHuffmanCode(HuffmanTree *tree) {
char code[26] = {0};
HuffmanCodeList *list = (HuffmanCodeList*)malloc(sizeof(HuffmanCodeList));
list->head = NULL;
list->tail = NULL;
generateHuffmanCodeList(tree->root, list, code, 0);
return list;
}
/* 根据哈夫曼编码表,将文本字符串编码为一个0-1串 */
char *encode(HuffmanCodeList *list, char *str) {
int len = strlen(str);
int codeLen = 0;
char *code = (char*)malloc(sizeof(char) * (len * 10)); /* 最多需要10倍的空间 */
code[0] = '\0';
for (int i = 0; i < len; i++) {
if (isalpha(str[i])) {
/* 在编码表中查找当前字符的编码 */
HuffmanCode *huffmanCode = list->head;
while (huffmanCode != NULL && huffmanCode->data != str[i]) {
huffmanCode = huffmanCode->next;
}
if (huffmanCode != NULL) {
/* 将编码拼接到0-1串中 */
strcat(code, huffmanCode->code);
codeLen += strlen(huffmanCode->code);
}
}
}
/* 将0-1串的长度写在字符串的前四个字节中 */
char *result = (char*)malloc(sizeof(char) * (codeLen / 8 + 5));
result[0] = codeLen >> 24 & 0xff;
result[1] = codeLen >> 16 & 0xff;
result[2] = codeLen >> 8 & 0xff;
result[3] = codeLen & 0xff;
for (int i = 0; i < codeLen; i += 8) {
char byte = 0;
for (int j = 0; j < 8; j++) {
byte <<= 1;
if (code[i + j] == '1') {
byte |= 1;
}
}
result[i / 8 + 4] = byte;
}
result[codeLen / 8 + 4] = '\0';
free(code);
return result;
}
/* 根据哈夫曼编码表,将0-1串解码为文本字符串 */
char *decode(HuffmanCodeList *list, char *code) {
int len = (code[0] << 24) | (code[1] << 16) | (code[2] << 8) | code[3];
char *str = (char*)malloc(sizeof(char) * (len + 1));
int pos = 4;
int strPos = 0;
while (pos < (len + 4) / 8 + 4) {
/* 将一个字节拆分为8个二进制位 */
char byte = code[pos++];
for (int i = 7; i >= 0 && strPos < len; i--) {
int bit = (byte >> i) & 1;
/* 在编码表中查找当前0-1子串对应的字符 */
HuffmanCode *huffmanCode = list->head;
while (huffmanCode != NULL && strlen(huffmanCode->code) <= strPos && strncmp(huffmanCode->code, str + strPos - strlen(huffmanCode->code), strlen(huffmanCode->code)) != 0) {
huffmanCode = huffmanCode->next;
}
if (huffmanCode != NULL && strlen(huffmanCode->code) > strPos && huffmanCode->code[strPos] == '0' + bit) {
/* 匹配成功 */
str[strPos++] = huffmanCode->data;
} else {
/* 匹配失败 */
i++;
}
}
}
str[len] = '\0';
return str;
}
int main() {
char str[1000];
printf("请输入一个带空格由英文字母构成的文本字符串:\n");
fgets(str, sizeof(str), stdin);
str[strlen(str) - 1] = '\0'; /* 去掉末尾的换行符 */
/* 创建哈夫曼树 */
HuffmanTree *tree = createHuffmanTree(str);
/* 生成哈夫曼编码表 */
HuffmanCodeList *list = generateHuffmanCode(tree);
/* 将文本字符串编码为0-1串 */
char *code = encode(list, str);
printf("编码结果:\n");
for (int i = 0; i < (strlen(code) + 7) / 8; i++) {
printf("%02x ", code[i] & 0xff);
}
printf("\n");
/* 将0-1串解码为文本字符串 */
char *decodedStr = decode(list, code);
printf("解码结果:%s\n", decodedStr);
/* 释放内存 */
TreeNode *node = tree->root;
free(tree);
while (node != NULL) {
TreeNode *next = node->left;
if (next == NULL) {
next = node->right;
free(node);
}
node = next;
}
HuffmanCode *huffmanCode = list->head;
while (huffmanCode != NULL) {
HuffmanCode *next = huffmanCode->next;
free(huffmanCode->code);
free(huffmanCode);
huffmanCode = next;
}
free(code);
free(decodedStr);
return 0;
}
```
以上代码是一个完整的实现,可以从键盘输入一个带空格由英文字母构成的文本字符串,创建对应的哈夫曼树,然后进行哈夫曼编码,并输出编码结果。同时,也实现了将哈夫曼编码结果解码为原始字符串的功能。
需要注意的是,由于哈夫曼编码结果是一个0-1串,所以在输出时需要进行一些转换。我在代码中采用了十六进制输出的方式,每个字节用两个十六进制数表示。在解码时,需要将0-1串中的每一个字节拆分成8个二进制位,并在编码表中查找对应的字符。如果匹配成功,则将该字符加入解码结果中;否则,将当前二进制位重新加入待匹配的子串中,继续匹配下一个二进制位。
阅读全文