C语言 实现哈夫曼编码
时间: 2023-01-11 13:55:36 浏览: 131
好的,我来给你讲解如何用 C 语言实现哈夫曼编码。
哈夫曼编码是一种用于数据压缩的编码方法,它能够有效地减少数据的大小。哈夫曼编码的基本思想是:给出一个字符串,根据字符串中每个字符的出现频率,构建一棵二叉树,然后为每个字符分配一个二进制编码,使得字符串中出现频率较高的字符的编码较短,这样就能有效地减少数据的大小。
下面是一个简单的 C 语言实现的哈夫曼编码的例子:
```
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_SIZE 100
typedef struct {
// 存储哈夫曼编码的字符数组
char ch[MAX_TREE_SIZE];
// 存储哈夫曼编码的权值
int weight[MAX_TREE_SIZE];
// 存储哈夫曼编码的左儿子
int left[MAX_TREE_SIZE];
// 存储哈夫曼编码的右儿子
int right[MAX_TREE_SIZE];
// 哈夫曼编码的节点数
int n;
} HuffmanTree;
// 哈夫曼树的节点
typedef struct {
// 节点的权值
int weight;
// 左儿子的下标
int left;
// 右儿子的下标
int right;
// 父亲节点的下标
int parent;
} HTNode;
相关问题
用C语言实现哈夫曼编码
以下是使用C语言实现哈夫曼编码的基本步骤:
1. 定义结构体来表示每个字符及其对应的频率和编码:
```
struct node {
char ch;
int freq;
char *code;
};
```
2. 统计每个字符出现的频率,并将其存储到一个数组中:
```
int freq[256] = {0};
for (int i = 0; i < strlen(input); i++) {
freq[input[i]]++;
}
```
3. 构建哈夫曼树,可以使用优先队列(也称为堆)来实现:
```
struct node *buildHuffmanTree(int freq[]) {
priority_queue<struct node*, vector<struct node*>, compare> pq;
for (int i = 0; i < 256; i++) {
if (freq[i] > 0) {
struct node *n = new node;
n->ch = (char)i;
n->freq = freq[i];
n->left = NULL;
n->right = NULL;
pq.push(n);
}
}
while (pq.size() > 1) {
struct node *n1 = pq.top();
pq.pop();
struct node *n2 = pq.top();
pq.pop();
struct node *n = new node;
n->freq = n1->freq + n2->freq;
n->left = n1;
n->right = n2;
pq.push(n);
}
return pq.top();
}
```
其中,compare是一个比较函数,用于将节点按照频率从小到大排序。
4. 通过递归遍历哈夫曼树,生成每个字符的编码:
```
void generateCodes(struct node *root, char *code, int index) {
if (root->left == NULL && root->right == NULL) {
code[index] = '\0';
root->code = strdup(code);
return;
}
code[index] = '0';
generateCodes(root->left, code, index + 1);
code[index] = '1';
generateCodes(root->right, code, index + 1);
}
```
5. 使用生成的编码对原始数据进行编码:
```
char *encodeData(char *input, struct node *root) {
char *output = (char*)malloc(sizeof(char) * strlen(input) * 8);
int index = 0;
for (int i = 0; i < strlen(input); i++) {
struct node *n = findNode(root, input[i]);
strcat(output, n->code);
}
return output;
}
```
其中,findNode函数用于在哈夫曼树中查找指定字符对应的节点。
6. 使用生成的编码对压缩后的数据进行解码:
```
char *decodeData(char *input, struct node *root) {
char *output = (char*)malloc(sizeof(char) * strlen(input));
int index = 0;
struct node *n = root;
for (int i = 0; i < strlen(input); i++) {
if (input[i] == '0') {
n = n->left;
} else {
n = n->right;
}
if (n->left == NULL && n->right == NULL) {
output[index++] = n->ch;
n = root;
}
}
output[index] = '\0';
return output;
}
```
以上就是使用C语言实现哈夫曼编码的基本步骤。
c语言实现哈夫曼编码压缩
好的,实现哈夫曼编码压缩的步骤如下:
1. 统计字符频率:遍历待压缩的文件,统计每个字符出现的次数,将结果存储在一个频率表中。
2. 构建哈夫曼树:根据字符频率表构建哈夫曼树,构建过程中需要用到最小堆。
3. 生成编码表:根据哈夫曼树,生成每个字符对应的哈夫曼编码,将结果存储在一个编码表中。
4. 压缩文件:遍历待压缩的文件,将每个字符替换为其对应的哈夫曼编码,重新写入一个压缩后的文件中。
下面是一个简单的C语言实现:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
};
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHeapNode** array;
};
struct MinHeapNode* newNode(char data, unsigned freq)
{
struct MinHeapNode* temp
= (struct MinHeapNode*)malloc
(sizeof(struct MinHeapNode));
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
}
struct MinHeap* createMinHeap(unsigned capacity)
{
struct MinHeap* minHeap
= (struct MinHeap*)malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array
= (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
return minHeap;
}
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b)
{
struct MinHeapNode* t = *a;
*a = *b;
*b = t;
}
void minHeapify(struct MinHeap* minHeap, int idx)
{
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size &&
minHeap->array[left]->freq < minHeap->array[smallest]->freq)
smallest = left;
if (right < minHeap->size &&
minHeap->array[right]->freq < minHeap->array[smallest]->freq)
smallest = right;
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
int isSizeOne(struct MinHeap* minHeap)
{
return (minHeap->size == 1);
}
struct MinHeapNode* extractMin(struct MinHeap* minHeap)
{
struct MinHeapNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode)
{
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
void buildMinHeap(struct MinHeap* minHeap)
{
int n = minHeap->size - 1;
int i;
for (i = (n - 1) / 2; i >= 0; --i)
minHeapify(minHeap, i);
}
void printArr(int arr[], int n)
{
int i;
for (i = 0; i < n; ++i)
printf("%d", arr[i]);
printf("\n");
}
int isLeaf(struct MinHeapNode* root)
{
return !(root->left) && !(root->right);
}
struct MinHeap* createAndBuildMinHeap(char data[], int freq[], int size)
{
struct MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
minHeap->array[i] = newNode(data[i], freq[i]);
minHeap->size = size;
buildMinHeap(minHeap);
return minHeap;
}
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size)
{
struct MinHeapNode *left, *right, *top;
struct MinHeap* minHeap = createAndBuildMinHeap(data, freq, size);
while (!isSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
void printCodes(struct MinHeapNode* root, int arr[], int top)
{
if (root->left) {
arr[top] = 0;
printCodes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
printCodes(root->right, arr, top + 1);
}
if (isLeaf(root)) {
printf("%c: ", root->data);
printArr(arr, top);
}
}
void HuffmanCodes(char data[], int freq[], int size)
{
struct MinHeapNode* root
= buildHuffmanTree(data, freq, size);
int arr[MAX_TREE_HT], top = 0;
printCodes(root, arr, top);
}
void compress(char* input, char* output)
{
FILE* fp1 = fopen(input, "r");
if (fp1 == NULL) {
printf("File Not Found!\n");
return;
}
FILE* fp2 = fopen(output, "w");
char ch;
int freq[256] = { 0 };
while ((ch = fgetc(fp1)) != EOF) {
freq[ch]++;
}
char data[256] = { 0 };
int j = 0;
for (int i = 0; i < 256; i++) {
if (freq[i] != 0) {
data[j] = (char)i;
j++;
}
}
HuffmanCodes(data, freq, j);
rewind(fp1);
char buffer = 0, bit_count = 0;
while ((ch = fgetc(fp1)) != EOF) {
int len = strlen(codes[ch]);
for (int i = 0; i < len; i++) {
buffer = buffer << 1;
if (codes[ch][i] == '1') {
buffer = buffer | 1;
}
bit_count++;
if (bit_count == 8) {
fwrite(&buffer, 1, 1, fp2);
buffer = 0;
bit_count = 0;
}
}
}
if (bit_count != 0) {
buffer = buffer << (8 - bit_count);
fwrite(&buffer, 1, 1, fp2);
}
fclose(fp1);
fclose(fp2);
}
int main()
{
compress("test.txt", "test.huff");
return 0;
}
```
其中,compress函数实现了文件压缩的过程。在压缩过程中,先统计字符频率,再根据字符频率构建哈夫曼树,生成每个字符对应的哈夫曼编码,最后将每个字符替换为其对应的哈夫曼编码,写入一个压缩后的文件中。
阅读全文