利用C程序对一个文本文件进行霍夫曼编码压缩
时间: 2023-12-20 09:04:48 浏览: 70
霍夫曼编码是一种基于概率的无损数据压缩算法,它可以将出现频率较高的字符用较短的编码表示,而将出现频率较低的字符用较长的编码表示,从而实现压缩效果。
以下是一个基于C语言的霍夫曼编码压缩程序的实现:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
typedef struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
} MinHeapNode;
typedef struct MinHeap {
unsigned size;
unsigned capacity;
MinHeapNode **array;
} MinHeap;
typedef struct HuffmanNode {
char data;
char *code;
} HuffmanNode;
typedef struct HuffmanTable {
unsigned size;
unsigned capacity;
HuffmanNode *array;
} HuffmanTable;
MinHeapNode *newMinHeapNode(char data, unsigned freq) {
MinHeapNode *node = (MinHeapNode *) malloc(sizeof(MinHeapNode));
node->left = node->right = NULL;
node->data = data;
node->freq = freq;
return node;
}
MinHeap *createMinHeap(unsigned capacity) {
MinHeap *minHeap = (MinHeap *) malloc(sizeof(MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (MinHeapNode **) malloc(minHeap->capacity * sizeof(MinHeapNode *));
return minHeap;
}
void swapMinHeapNode(MinHeapNode **a, MinHeapNode **b) {
MinHeapNode *t = *a;
*a = *b;
*b = t;
}
void minHeapify(MinHeap *minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) {
smallest = left;
}
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) {
smallest = right;
}
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
int isSizeOne(MinHeap *minHeap) {
return (minHeap->size == 1);
}
MinHeapNode *extractMin(MinHeap *minHeap) {
MinHeapNode *temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
void insertMinHeap(MinHeap *minHeap, MinHeapNode *minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
void buildMinHeap(MinHeap *minHeap) {
int n = minHeap->size - 1;
int i;
for (i = (n - 1) / 2; i >= 0; --i) {
minHeapify(minHeap, i);
}
}
void printArray(int arr[], int n) {
int i;
for (i = 0; i < n; ++i) {
printf("%d", arr[i]);
}
printf("\n");
}
int isLeaf(MinHeapNode *root) {
return !(root->left) && !(root->right);
}
HuffmanTable *createHuffmanTable(unsigned capacity) {
HuffmanTable *huffmanTable = (HuffmanTable *) malloc(sizeof(HuffmanTable));
huffmanTable->size = 0;
huffmanTable->capacity = capacity;
huffmanTable->array = (HuffmanNode *) malloc(huffmanTable->capacity * sizeof(HuffmanNode));
return huffmanTable;
}
void addHuffmanNode(HuffmanTable *huffmanTable, char data, char code[]) {
huffmanTable->array[huffmanTable->size].data = data;
huffmanTable->array[huffmanTable->size].code = (char *) malloc((strlen(code) + 1) * sizeof(char));
strcpy(huffmanTable->array[huffmanTable->size].code, code);
huffmanTable->size++;
}
HuffmanTable *buildHuffmanTable(MinHeapNode *root, char code[], int top, HuffmanTable *huffmanTable) {
if (root->left) {
code[top] = '0';
buildHuffmanTable(root->left, code, top + 1, huffmanTable);
}
if (root->right) {
code[top] = '1';
buildHuffmanTable(root->right, code, top + 1, huffmanTable);
}
if (isLeaf(root)) {
addHuffmanNode(huffmanTable, root->data, code);
}
return huffmanTable;
}
void encodeFile(char *fileName, HuffmanTable *huffmanTable) {
FILE *fp = fopen(fileName, "r");
if (fp == NULL) {
printf("Error: Could not open file %s", fileName);
return;
}
char ch;
while ((ch = fgetc(fp)) != EOF) {
for (int i = 0; i < huffmanTable->size; i++) {
if (huffmanTable->array[i].data == ch) {
printf("%s", huffmanTable->array[i].code);
}
}
}
fclose(fp);
}
void decodeFile(MinHeapNode *root, char *fileName) {
FILE *fp = fopen(fileName, "r");
if (fp == NULL) {
printf("Error: Could not open file %s", fileName);
return;
}
MinHeapNode *curr = root;
char ch;
while ((ch = fgetc(fp)) != EOF) {
if (ch == '0') {
curr = curr->left;
} else {
curr = curr->right;
}
if (isLeaf(curr)) {
printf("%c", curr->data);
curr = root;
}
}
fclose(fp);
}
void HuffmanCodes(char *fileName) {
FILE *fp = fopen(fileName, "r");
if (fp == NULL) {
printf("Error: Could not open file %s", fileName);
return;
}
int freq[256] = {0};
char ch;
while ((ch = fgetc(fp)) != EOF) {
freq[ch]++;
}
fclose(fp);
MinHeap *minHeap = createMinHeap(256);
for (int i = 0; i < 256; ++i) {
if (freq[i] != 0) {
insertMinHeap(minHeap, newMinHeapNode(i, freq[i]));
}
}
while (!isSizeOne(minHeap)) {
MinHeapNode *left = extractMin(minHeap);
MinHeapNode *right = extractMin(minHeap);
MinHeapNode *top = newMinHeapNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
MinHeapNode *root = extractMin(minHeap);
char code[MAX_TREE_HT];
HuffmanTable *huffmanTable = createHuffmanTable(256);
buildHuffmanTable(root, code, 0, huffmanTable);
printf("Huffman Codes:\n");
for (int i = 0; i < huffmanTable->size; i++) {
printf("%c: %s\n", huffmanTable->array[i].data, huffmanTable->array[i].code);
}
printf("\nEncoded File:\n");
encodeFile(fileName, huffmanTable);
printf("\n\nDecoded File:\n");
decodeFile(root, "encoded.txt");
}
int main() {
HuffmanCodes("input.txt");
return 0;
}
```
在这个例子中,我们首先统计文件中每个字符出现的频率,并将其存储在一个数组中。然后,我们使用一个最小堆来构建霍夫曼树并生成霍夫曼编码。最后,我们使用这些编码对文件进行压缩,并将压缩后的结果写入一个新文件中。要解压缩文件,我们只需重建原始的霍夫曼树,并使用相同的编码来解码压缩的文件。
请注意,这只是一个基础的实现示例,可能需要进行更多的调整和改进才能用于实际应用。
阅读全文