写一段用huffman实现文件压缩和解压缩的代码
时间: 2024-03-14 20:45:43 浏览: 66
一个用huffman算法实现的 压缩源代码
好的,下面是一个使用 Huffman 编码实现文件压缩和解压缩的示例代码:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
#define MAX_CHAR 256
// Huffman树节点结构体
struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
};
// Huffman编码表结构体
struct HuffmanCode {
char c;
char *code;
};
// Huffman编码表
struct HuffmanCode huffmanCodes[MAX_CHAR];
// Huffman树节点最小堆结构体
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHeapNode **array;
};
// 创建一个新的Huffman树节点
struct MinHeapNode *newNode(char data, unsigned freq) {
struct MinHeapNode *temp = (struct MinHeapNode *) malloc(sizeof(struct MinHeapNode));
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
}
// 创建一个空的最小堆
struct MinHeap *createMinHeap(unsigned capacity) {
struct MinHeap *minHeap = (struct MinHeap *) malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (struct MinHeapNode **) malloc(minHeap->capacity * sizeof(struct MinHeapNode *));
return minHeap;
}
// 交换两个Huffman树节点
void swapMinHeapNode(struct MinHeapNode **a, struct MinHeapNode **b) {
struct MinHeapNode *t = *a;
*a = *b;
*b = t;
}
// 最小堆的堆化操作
void minHeapify(struct MinHeap *minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) {
smallest = left;
}
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) {
smallest = right;
}
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
// 判断最小堆是否只有一个节点
int isSizeOne(struct MinHeap *minHeap) {
return (minHeap->size == 1);
}
// 从最小堆中取出频率最小的节点
struct MinHeapNode *extractMin(struct MinHeap *minHeap) {
struct MinHeapNode *temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// 插入一个新的Huffman树节点到最小堆中
void insertMinHeap(struct MinHeap *minHeap, struct MinHeapNode *minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
// 构建Huffman树
struct MinHeapNode *buildHuffmanTree(char data[], int freq[], int size) {
struct MinHeapNode *left, *right, *top;
struct MinHeap *minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i) {
insertMinHeap(minHeap, newNode(data[i], freq[i]));
}
while (!isSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
// 将Huffman编码表写入文件
void writeHuffmanCodesToFile(FILE *fp, struct MinHeapNode *root, char *code, int index) {
if (root == NULL) {
return;
}
if (root->data != '$') {
huffmanCodes[root->data].c = root->data;
huffmanCodes[root->data].code = (char *) malloc((index + 1) * sizeof(char));
strcpy(huffmanCodes[root->data].code, code);
fprintf(fp, "%c%s\n", root->data, code);
}
writeHuffmanCodesToFile(fp, root->left, strcat(code, "0"), index + 1);
code[index] = '\0';
writeHuffmanCodesToFile(fp, root->right, strcat(code, "1"), index + 1);
code[index] = '\0';
}
// 从文件中读取Huffman编码表
void readHuffmanCodesFromFile(FILE *fp) {
char line[MAX_TREE_HT];
while (fgets(line, MAX_TREE_HT, fp)) {
char c = line[0];
char *code = strtok(line + 1, "\n");
huffmanCodes[c].c = c;
huffmanCodes[c].code = (char *) malloc((strlen(code) + 1) * sizeof(char));
strcpy(huffmanCodes[c].code, code);
}
}
// 压缩文件
void compressFile(char *sourceFile, char *destinationFile) {
FILE *source = fopen(sourceFile, "rb");
if (!source) {
printf("Error: Failed to open source file.\n");
return;
}
fseek(source, 0, SEEK_END);
unsigned long sourceSize = ftell(source);
fseek(source, 0, SEEK_SET);
char *sourceData = (char *) malloc(sourceSize * sizeof(char));
fread(sourceData, sizeof(char), sourceSize, source);
int freq[MAX_CHAR] = {0};
for (unsigned long i = 0; i < sourceSize; ++i) {
++freq[sourceData[i]];
}
char data[MAX_CHAR];
int size = 0;
for (int i = 0; i < MAX_CHAR; ++i) {
if (freq[i] != 0) {
data[size++] = i;
}
}
struct MinHeapNode *root = buildHuffmanTree(data, freq, size);
FILE *dest = fopen(destinationFile, "wb");
if (!dest) {
printf("Error: Failed to create destination file.\n");
return;
}
char code[MAX_TREE_HT] = "";
writeHuffmanCodesToFile(dest, root, code, 0);
int bitIndex = 0;
unsigned char currentByte = 0;
for (unsigned long i = 0; i < sourceSize; ++i) {
char c = sourceData[i];
char *huffmanCode = huffmanCodes[c].code;
int len = strlen(huffmanCode);
for (int j = 0; j < len; ++j) {
if (huffmanCode[j] == '1') {
currentByte |= (1 << (7 - bitIndex));
}
if (++bitIndex == 8) {
fwrite(¤tByte, sizeof(unsigned char), 1, dest);
bitIndex = 0;
currentByte = 0;
}
else if (i == sourceSize - 1 && j == len - 1) {
fwrite(¤tByte, sizeof(unsigned char), 1, dest);
}
}
}
fclose(source);
fclose(dest);
printf("File compressed successfully.\n");
}
// 解压文件
void decompressFile(char *sourceFile, char *destinationFile) {
FILE *source = fopen(sourceFile, "rb");
if (!source) {
printf("Error: Failed to open source file.\n");
return;
}
fseek(source, 0, SEEK_END);
unsigned long sourceSize = ftell(source);
fseek(source, 0, SEEK_SET);
char *sourceData = (char *) malloc(sourceSize * sizeof(char));
fread(sourceData, sizeof(char), sourceSize, source);
int freq[MAX_CHAR] = {0};
int pos = 0;
while (sourceData[pos] != '\n') {
char c = sourceData[pos++];
char *code = strtok(sourceData + pos, "\n");
huffmanCodes[c].c = c;
huffmanCodes[c].code = (char *) malloc((strlen(code) + 1) * sizeof(char));
strcpy(huffmanCodes[c].code, code);
pos += strlen(code) + 1;
}
FILE *dest = fopen(destinationFile, "wb");
if (!dest) {
printf("Error: Failed to create destination file.\n");
return;
}
struct MinHeapNode *root = buildHuffmanTree(NULL, freq, 0);
int bitIndex = 0;
unsigned char currentByte = 0;
for (unsigned long i = pos; i < sourceSize; ++i) {
unsigned char byte = sourceData[i];
for (int j = 7; j >= 0; --j) {
if (byte & (1 << j)) {
root = root->right;
}
else {
root = root->left;
}
if (root->left == NULL && root->right == NULL) {
fwrite(&root->data, sizeof(char), 1, dest);
root = buildHuffmanTree(NULL, freq, 0);
}
}
}
fclose(source);
fclose(dest);
printf("File decompressed successfully.\n");
}
int main() {
compressFile("source.txt", "compressed.bin");
decompressFile("compressed.bin", "decompressed.txt");
return 0;
}
```
上述代码中,使用了一个最小堆来构建 Huffman 树,并生成 Huffman 编码表。压缩文件时,先将文件数据读入内存,统计每个字符出现的频率,然后构建 Huffman 树并生成 Huffman 编码表,最后将编码后的数据写入到压缩文件中。解压文件时,从压缩文件中读取 Huffman 编码表和编码后的数据,然后根据 Huffman 编码表进行解码,将解码后的数据写入到解压文件中。
需要注意的是,在压缩文件时,因为是按位写入数据,所以需要特别注意写入最后一个字节时的边界问题。
希望能够帮助到您!
阅读全文