用C语言分别利用香农-费诺编码、霍夫曼编码、LZ编码、算术编码实现数据压缩
时间: 2023-07-27 16:11:18 浏览: 126
以下是分别利用C语言实现数据压缩的示例代码:
1. 香农-费诺编码
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
char ch;
int freq;
struct node *left, *right;
} Node;
typedef struct heap {
int size;
int capacity;
Node **arr;
} Heap;
Node* new_node(char ch, int freq) {
Node* temp = (Node*) malloc(sizeof(Node));
temp->ch = ch;
temp->freq = freq;
temp->left = temp->right = NULL;
return temp;
}
Heap* create_heap(int capacity) {
Heap* heap = (Heap*) malloc(sizeof(Heap));
heap->size = 0;
heap->capacity = capacity;
heap->arr = (Node**) malloc(capacity * sizeof(Node*));
return heap;
}
void swap_node(Node **a, Node **b) {
Node* temp = *a;
*a = *b;
*b = temp;
}
void min_heapify(Heap *heap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < heap->size && heap->arr[left]->freq < heap->arr[smallest]->freq) {
smallest = left;
}
if (right < heap->size && heap->arr[right]->freq < heap->arr[smallest]->freq) {
smallest = right;
}
if (smallest != idx) {
swap_node(&heap->arr[idx], &heap->arr[smallest]);
min_heapify(heap, smallest);
}
}
int is_leaf_node(Node* node) {
return !(node->left) && !(node->right);
}
Heap* build_heap(char *str, int *freq) {
Heap* heap = create_heap(strlen(str));
for (int i = 0; i < strlen(str); i++) {
heap->arr[i] = new_node(str[i], freq[i]);
heap->size++;
}
for (int i = heap->size / 2 - 1; i >= 0; i--) {
min_heapify(heap, i);
}
return heap;
}
Node* build_huffman_tree(char *str, int *freq) {
Heap* heap = build_heap(str, freq);
while (heap->size != 1) {
Node *left = heap->arr[0];
swap_node(&heap->arr[0], &heap->arr[heap->size - 1]);
heap->size--;
min_heapify(heap, 0);
Node *right = heap->arr[0];
swap_node(&heap->arr[0], &heap->arr[heap->size - 1]);
heap->size--;
min_heapify(heap, 0);
Node *parent = new_node('#', left->freq + right->freq);
parent->left = left;
parent->right = right;
heap->arr[heap->size] = parent;
heap->size++;
min_heapify(heap, heap->size - 1);
}
return heap->arr[0];
}
void print_codes(Node *root, int arr[], int top) {
if (root->left) {
arr[top] = 0;
print_codes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
print_codes(root->right, arr, top + 1);
}
if (is_leaf_node(root)) {
printf("%c: ", root->ch);
for (int i = 0; i < top; i++) {
printf("%d", arr[i]);
}
printf("\n");
}
}
void huffman_encoding(char *str, int *freq) {
Node *root = build_huffman_tree(str, freq);
int arr[100], top = 0;
print_codes(root, arr, top);
}
int main() {
char str[] = "Hello, World!";
int freq[256] = {0};
for (int i = 0; i < strlen(str); i++) {
freq[(int) str[i]]++;
}
huffman_encoding(str, freq);
return 0;
}
```
2. 霍夫曼编码
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
typedef struct node {
char ch;
int freq;
struct node *left, *right;
} Node;
typedef struct heap {
int size;
int capacity;
Node **arr;
} Heap;
Node* new_node(char ch, int freq) {
Node* temp = (Node*) malloc(sizeof(Node));
temp->ch = ch;
temp->freq = freq;
temp->left = temp->right = NULL;
return temp;
}
Heap* create_heap(int capacity) {
Heap* heap = (Heap*) malloc(sizeof(Heap));
heap->size = 0;
heap->capacity = capacity;
heap->arr = (Node**) malloc(capacity * sizeof(Node*));
return heap;
}
void swap_node(Node **a, Node **b) {
Node* temp = *a;
*a = *b;
*b = temp;
}
void min_heapify(Heap *heap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < heap->size && heap->arr[left]->freq < heap->arr[smallest]->freq) {
smallest = left;
}
if (right < heap->size && heap->arr[right]->freq < heap->arr[smallest]->freq) {
smallest = right;
}
if (smallest != idx) {
swap_node(&heap->arr[idx], &heap->arr[smallest]);
min_heapify(heap, smallest);
}
}
int is_leaf_node(Node* node) {
return !(node->left) && !(node->right);
}
Heap* build_heap(char *str, int *freq) {
Heap* heap = create_heap(strlen(str));
for (int i = 0; i < strlen(str); i++) {
heap->arr[i] = new_node(str[i], freq[i]);
heap->size++;
}
for (int i = heap->size / 2 - 1; i >= 0; i--) {
min_heapify(heap, i);
}
return heap;
}
Node* build_huffman_tree(char *str, int *freq) {
Heap* heap = build_heap(str, freq);
while (heap->size != 1) {
Node *left = heap->arr[0];
swap_node(&heap->arr[0], &heap->arr[heap->size - 1]);
heap->size--;
min_heapify(heap, 0);
Node *right = heap->arr[0];
swap_node(&heap->arr[0], &heap->arr[heap->size - 1]);
heap->size--;
min_heapify(heap, 0);
Node *parent = new_node('#', left->freq + right->freq);
parent->left = left;
parent->right = right;
heap->arr[heap->size] = parent;
heap->size++;
min_heapify(heap, heap->size - 1);
}
return heap->arr[0];
}
void print_codes(Node *root, int arr[], int top) {
if (root->left) {
arr[top] = 0;
print_codes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
print_codes(root->right, arr, top + 1);
}
if (is_leaf_node(root)) {
printf("%c: ", root->ch);
for (int i = 0; i < top; i++) {
printf("%d", arr[i]);
}
printf("\n");
}
}
void huffman_encoding(char *str, int *freq) {
Node *root = build_huffman_tree(str, freq);
int arr[MAX_TREE_HT], top = 0;
print_codes(root, arr, top);
}
void huffman_decoding(Node *root, char *str) {
FILE *output_file = fopen("output.txt", "w");
Node *curr = root;
for (int i = 0; i < strlen(str); i++) {
if (str[i] == '0') {
curr = curr->left;
} else {
curr = curr->right;
}
if (is_leaf_node(curr)) {
fprintf(output_file, "%c", curr->ch);
curr = root;
}
}
fclose(output_file);
}
int main() {
char str[] = "Hello, World!";
int freq[256] = {0};
for (int i = 0; i < strlen(str); i++) {
freq[(int) str[i]]++;
}
Node *root = build_huffman_tree(str, freq);
huffman_encoding(str, freq);
FILE *input_file = fopen("output.txt", "r");
fseek(input_file, 0, SEEK_END);
long input_file_size = ftell(input_file);
rewind(input_file);
char *input_str = (char*) malloc((input_file_size + 1) * sizeof(char));
fread(input_str, sizeof(char), input_file_size, input_file);
input_str[input_file_size] = '\0';
fclose(input_file);
huffman_decoding(root, input_str);
free(input_str);
return 0;
}
```
3. LZ编码
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
int id;
int offset;
int length;
} Tuple;
int find_match(char *str, int i, int j) {
int k = 0;
while (str[i + k] == str[j + k] && str[i + k] != '\0' && str[j + k] != '\0' && k < 255) {
k++;
}
return k;
}
Tuple* lz_encoding(char *str) {
Tuple *result = (Tuple*) malloc(strlen(str) * sizeof(Tuple));
int result_idx = 0, i = 0;
while (i < strlen(str)) {
int max_length = 0, max_offset = 0;
for (int j = i - 1; j >= 0 && j >= i - 255; j--) {
int length = find_match(str, i, j);
if (length > max_length) {
max_length = length;
max_offset = i - j - length;
}
}
if (max_length > 0) {
result[result_idx].id = -1;
result[result_idx].offset = max_offset;
result[result_idx].length = max_length;
result_idx++;
i += max_length;
} else {
result[result_idx].id = (int) str[i];
result[result_idx].offset = -1;
result[result_idx].length = -1;
result_idx++;
i++;
}
}
result[result_idx].id = -2;
result[result_idx].offset = -2;
result[result_idx].length = -2;
return result;
}
void lz_decoding(Tuple *result) {
FILE *output_file = fopen("output.txt", "w");
int i = 0;
while (result[i].id != -2) {
if (result[i].id == -1) {
int j = 0;
while (j < result[i].length) {
char ch = fgetc(output_file - result[i].offset - 1);
fprintf(output_file, "%c", ch);
j++;
}
} else {
fprintf(output_file, "%c", (char) result[i].id);
}
i++;
}
fclose(output_file);
}
int main() {
char str[] = "Hello, World!";
Tuple *result = lz_encoding(str);
lz_decoding(result);
free(result);
return 0;
}
```
4. 算术编码
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
int start;
int end;
} Range;
Range new_range(int start, int end) {
Range r;
r.start = start;
r.end = end;
return r;
}
void arithmetic_encoding(char *str, int *freq) {
int total_freq = 0;
for (int i = 0; i < 256; i++) {
total_freq += freq[i];
}
Range r = new_range(0, total_freq);
for (int i = 0; i < strlen(str); i++) {
int ch = (int) str[i];
int freq_sum = 0;
for (int j = 0; j < ch; j++) {
freq_sum += freq[j];
}
Range new_r = new_range(r.start + freq_sum * (r.end - r.start) / total_freq, r.start + (freq_sum + freq[ch]) * (r.end - r.start) / total_freq);
r = new_r;
}
int code = (r.start + r.end) / 2;
printf("Arithmetic code: %d\n", code);
}
void arithmetic_decoding(int code, char *str, int *freq) {
int total_freq = 0;
for (int i = 0; i < 256; i++) {
total_freq += freq[i];
}
Range r = new_range(0, total_freq);
for (int i = strlen(str) - 1; i >= 0; i--) {
int ch = (int) str[i];
int freq_sum = 0;
for (int j = 0; j < ch; j++) {
freq_sum += freq[j];
}
Range new_r = new_range(r.start + freq_sum * (r.end - r.start) / total_freq, r.start + (freq_sum + freq[ch]) * (r.end - r.start) / total_freq);
if (code >= new_r.start && code < new_r.end) {
printf("%c", (char) ch);
r = new_r;
code = (code - r.start) * total_freq / (r.end - r.start);
}
}
}
int main() {
char str[] = "Hello, World!";
int freq[256] = {0};
for (int i = 0; i < strlen(str); i++) {
freq[(int) str[i]]++;
}
arithmetic_encoding(str, freq);
arithmetic_decoding(2192935, str, freq);
return 0;
}
```
阅读全文