在霍夫曼编码中,原始数据数组为大小为 4214,而输入的编码文件大小为 5201,想想看为什么编码后 的数据多了。
时间: 2024-01-24 14:20:24 浏览: 11
霍夫曼编码是一种无损压缩算法,它通过对出现频率较高的字符进行较短的编码,对出现频率较低的字符进行较长的编码,从而实现数据压缩。因此,在原始数据中,出现频率较高的字符被压缩得更小,而出现频率较低的字符被压缩得更大。因此,如果原始数据中包含大量出现频率较低的字符,那么编码后的数据大小可能会比原始数据还要大。另外,编码后的数据可能会包含一些元数据,例如编码表等,这些元数据也会增加编码文件的大小。因此,编码后的数据比原始数据大的情况是可能存在的。
相关问题
利用C程序对一个文本文件进行霍夫曼编码压缩
霍夫曼编码是一种基于概率的无损数据压缩算法,它可以将出现频率较高的字符用较短的编码表示,而将出现频率较低的字符用较长的编码表示,从而实现压缩效果。
以下是一个基于C语言的霍夫曼编码压缩程序的实现:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
typedef struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
} MinHeapNode;
typedef struct MinHeap {
unsigned size;
unsigned capacity;
MinHeapNode **array;
} MinHeap;
typedef struct HuffmanNode {
char data;
char *code;
} HuffmanNode;
typedef struct HuffmanTable {
unsigned size;
unsigned capacity;
HuffmanNode *array;
} HuffmanTable;
MinHeapNode *newMinHeapNode(char data, unsigned freq) {
MinHeapNode *node = (MinHeapNode *) malloc(sizeof(MinHeapNode));
node->left = node->right = NULL;
node->data = data;
node->freq = freq;
return node;
}
MinHeap *createMinHeap(unsigned capacity) {
MinHeap *minHeap = (MinHeap *) malloc(sizeof(MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (MinHeapNode **) malloc(minHeap->capacity * sizeof(MinHeapNode *));
return minHeap;
}
void swapMinHeapNode(MinHeapNode **a, MinHeapNode **b) {
MinHeapNode *t = *a;
*a = *b;
*b = t;
}
void minHeapify(MinHeap *minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) {
smallest = left;
}
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) {
smallest = right;
}
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
int isSizeOne(MinHeap *minHeap) {
return (minHeap->size == 1);
}
MinHeapNode *extractMin(MinHeap *minHeap) {
MinHeapNode *temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
void insertMinHeap(MinHeap *minHeap, MinHeapNode *minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
void buildMinHeap(MinHeap *minHeap) {
int n = minHeap->size - 1;
int i;
for (i = (n - 1) / 2; i >= 0; --i) {
minHeapify(minHeap, i);
}
}
void printArray(int arr[], int n) {
int i;
for (i = 0; i < n; ++i) {
printf("%d", arr[i]);
}
printf("\n");
}
int isLeaf(MinHeapNode *root) {
return !(root->left) && !(root->right);
}
HuffmanTable *createHuffmanTable(unsigned capacity) {
HuffmanTable *huffmanTable = (HuffmanTable *) malloc(sizeof(HuffmanTable));
huffmanTable->size = 0;
huffmanTable->capacity = capacity;
huffmanTable->array = (HuffmanNode *) malloc(huffmanTable->capacity * sizeof(HuffmanNode));
return huffmanTable;
}
void addHuffmanNode(HuffmanTable *huffmanTable, char data, char code[]) {
huffmanTable->array[huffmanTable->size].data = data;
huffmanTable->array[huffmanTable->size].code = (char *) malloc((strlen(code) + 1) * sizeof(char));
strcpy(huffmanTable->array[huffmanTable->size].code, code);
huffmanTable->size++;
}
HuffmanTable *buildHuffmanTable(MinHeapNode *root, char code[], int top, HuffmanTable *huffmanTable) {
if (root->left) {
code[top] = '0';
buildHuffmanTable(root->left, code, top + 1, huffmanTable);
}
if (root->right) {
code[top] = '1';
buildHuffmanTable(root->right, code, top + 1, huffmanTable);
}
if (isLeaf(root)) {
addHuffmanNode(huffmanTable, root->data, code);
}
return huffmanTable;
}
void encodeFile(char *fileName, HuffmanTable *huffmanTable) {
FILE *fp = fopen(fileName, "r");
if (fp == NULL) {
printf("Error: Could not open file %s", fileName);
return;
}
char ch;
while ((ch = fgetc(fp)) != EOF) {
for (int i = 0; i < huffmanTable->size; i++) {
if (huffmanTable->array[i].data == ch) {
printf("%s", huffmanTable->array[i].code);
}
}
}
fclose(fp);
}
void decodeFile(MinHeapNode *root, char *fileName) {
FILE *fp = fopen(fileName, "r");
if (fp == NULL) {
printf("Error: Could not open file %s", fileName);
return;
}
MinHeapNode *curr = root;
char ch;
while ((ch = fgetc(fp)) != EOF) {
if (ch == '0') {
curr = curr->left;
} else {
curr = curr->right;
}
if (isLeaf(curr)) {
printf("%c", curr->data);
curr = root;
}
}
fclose(fp);
}
void HuffmanCodes(char *fileName) {
FILE *fp = fopen(fileName, "r");
if (fp == NULL) {
printf("Error: Could not open file %s", fileName);
return;
}
int freq[256] = {0};
char ch;
while ((ch = fgetc(fp)) != EOF) {
freq[ch]++;
}
fclose(fp);
MinHeap *minHeap = createMinHeap(256);
for (int i = 0; i < 256; ++i) {
if (freq[i] != 0) {
insertMinHeap(minHeap, newMinHeapNode(i, freq[i]));
}
}
while (!isSizeOne(minHeap)) {
MinHeapNode *left = extractMin(minHeap);
MinHeapNode *right = extractMin(minHeap);
MinHeapNode *top = newMinHeapNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
MinHeapNode *root = extractMin(minHeap);
char code[MAX_TREE_HT];
HuffmanTable *huffmanTable = createHuffmanTable(256);
buildHuffmanTable(root, code, 0, huffmanTable);
printf("Huffman Codes:\n");
for (int i = 0; i < huffmanTable->size; i++) {
printf("%c: %s\n", huffmanTable->array[i].data, huffmanTable->array[i].code);
}
printf("\nEncoded File:\n");
encodeFile(fileName, huffmanTable);
printf("\n\nDecoded File:\n");
decodeFile(root, "encoded.txt");
}
int main() {
HuffmanCodes("input.txt");
return 0;
}
```
在这个例子中,我们首先统计文件中每个字符出现的频率,并将其存储在一个数组中。然后,我们使用一个最小堆来构建霍夫曼树并生成霍夫曼编码。最后,我们使用这些编码对文件进行压缩,并将压缩后的结果写入一个新文件中。要解压缩文件,我们只需重建原始的霍夫曼树,并使用相同的编码来解码压缩的文件。
请注意,这只是一个基础的实现示例,可能需要进行更多的调整和改进才能用于实际应用。
matlab霍夫曼编码函数
Matlab中自带有`huffmandict`和`huffmanenco`两个函数可以用于霍夫曼编码。其中,`huffmandict`用于生成霍夫曼编码的码表,`huffmanenco`用于对数据进行编码。
以下是一个使用`huffmandict`和`huffmanenco`函数进行霍夫曼编码的示例代码:
```matlab
% 原始数据
data = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'];
% 计算每个数据出现的频率
freq = [0.0817, 0.0149, 0.0278, 0.0425, 0.127, 0.0223, 0.0202, 0.0609, 0.0697, 0.0015, 0.0077, 0.0402, 0.0241, 0.0675, 0.0751, 0.0193, 0.0009, 0.0599, 0.0633, 0.0906, 0.0276, 0.0098, 0.0236, 0.0015, 0.0197, 0.0007];
% 生成霍夫曼编码的码表
dict = huffmandict(data, freq);
% 对数据进行编码
encoded_data = huffmanenco(data, dict);
% 输出结果
disp('原始数据:');
disp(data);
disp('编码后的数据:');
disp(encoded_data);
```
执行以上代码,将会输出原始数据和编码后的数据。其中,原始数据是一个包含26个大写字母的字符数组,编码后的数据是一个包含0和1的逻辑向量。
需要注意的是,使用`huffmanenco`函数进行编码时,需要先将原始数据转换为一维向量或一维字符数组。如果原始数据是多维数组,则需要使用`reshape`函数将其转换为一维数组。