用C语言分别利用香农-费诺编码、霍夫曼编码、LZ编码、算术编码实现数据压缩

时间: 2023-07-27 20:11:18 浏览: 57
以下是分别利用C语言实现数据压缩的示例代码: 1. 香农-费诺编码 ```c #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct node { char ch; int freq; struct node *left, *right; } Node; typedef struct heap { int size; int capacity; Node **arr; } Heap; Node* new_node(char ch, int freq) { Node* temp = (Node*) malloc(sizeof(Node)); temp->ch = ch; temp->freq = freq; temp->left = temp->right = NULL; return temp; } Heap* create_heap(int capacity) { Heap* heap = (Heap*) malloc(sizeof(Heap)); heap->size = 0; heap->capacity = capacity; heap->arr = (Node**) malloc(capacity * sizeof(Node*)); return heap; } void swap_node(Node **a, Node **b) { Node* temp = *a; *a = *b; *b = temp; } void min_heapify(Heap *heap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < heap->size && heap->arr[left]->freq < heap->arr[smallest]->freq) { smallest = left; } if (right < heap->size && heap->arr[right]->freq < heap->arr[smallest]->freq) { smallest = right; } if (smallest != idx) { swap_node(&heap->arr[idx], &heap->arr[smallest]); min_heapify(heap, smallest); } } int is_leaf_node(Node* node) { return !(node->left) && !(node->right); } Heap* build_heap(char *str, int *freq) { Heap* heap = create_heap(strlen(str)); for (int i = 0; i < strlen(str); i++) { heap->arr[i] = new_node(str[i], freq[i]); heap->size++; } for (int i = heap->size / 2 - 1; i >= 0; i--) { min_heapify(heap, i); } return heap; } Node* build_huffman_tree(char *str, int *freq) { Heap* heap = build_heap(str, freq); while (heap->size != 1) { Node *left = heap->arr[0]; swap_node(&heap->arr[0], &heap->arr[heap->size - 1]); heap->size--; min_heapify(heap, 0); Node *right = heap->arr[0]; swap_node(&heap->arr[0], &heap->arr[heap->size - 1]); heap->size--; min_heapify(heap, 0); Node *parent = new_node('#', left->freq + right->freq); parent->left = left; parent->right = right; heap->arr[heap->size] = parent; heap->size++; min_heapify(heap, heap->size - 1); } return heap->arr[0]; } void print_codes(Node *root, int arr[], int top) { if (root->left) { arr[top] = 0; print_codes(root->left, arr, top + 1); } if (root->right) { arr[top] = 1; print_codes(root->right, arr, top + 1); } if (is_leaf_node(root)) { printf("%c: ", root->ch); for (int i = 0; i < top; i++) { printf("%d", arr[i]); } printf("\n"); } } void huffman_encoding(char *str, int *freq) { Node *root = build_huffman_tree(str, freq); int arr[100], top = 0; print_codes(root, arr, top); } int main() { char str[] = "Hello, World!"; int freq[256] = {0}; for (int i = 0; i < strlen(str); i++) { freq[(int) str[i]]++; } huffman_encoding(str, freq); return 0; } ``` 2. 霍夫曼编码 ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_TREE_HT 100 typedef struct node { char ch; int freq; struct node *left, *right; } Node; typedef struct heap { int size; int capacity; Node **arr; } Heap; Node* new_node(char ch, int freq) { Node* temp = (Node*) malloc(sizeof(Node)); temp->ch = ch; temp->freq = freq; temp->left = temp->right = NULL; return temp; } Heap* create_heap(int capacity) { Heap* heap = (Heap*) malloc(sizeof(Heap)); heap->size = 0; heap->capacity = capacity; heap->arr = (Node**) malloc(capacity * sizeof(Node*)); return heap; } void swap_node(Node **a, Node **b) { Node* temp = *a; *a = *b; *b = temp; } void min_heapify(Heap *heap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < heap->size && heap->arr[left]->freq < heap->arr[smallest]->freq) { smallest = left; } if (right < heap->size && heap->arr[right]->freq < heap->arr[smallest]->freq) { smallest = right; } if (smallest != idx) { swap_node(&heap->arr[idx], &heap->arr[smallest]); min_heapify(heap, smallest); } } int is_leaf_node(Node* node) { return !(node->left) && !(node->right); } Heap* build_heap(char *str, int *freq) { Heap* heap = create_heap(strlen(str)); for (int i = 0; i < strlen(str); i++) { heap->arr[i] = new_node(str[i], freq[i]); heap->size++; } for (int i = heap->size / 2 - 1; i >= 0; i--) { min_heapify(heap, i); } return heap; } Node* build_huffman_tree(char *str, int *freq) { Heap* heap = build_heap(str, freq); while (heap->size != 1) { Node *left = heap->arr[0]; swap_node(&heap->arr[0], &heap->arr[heap->size - 1]); heap->size--; min_heapify(heap, 0); Node *right = heap->arr[0]; swap_node(&heap->arr[0], &heap->arr[heap->size - 1]); heap->size--; min_heapify(heap, 0); Node *parent = new_node('#', left->freq + right->freq); parent->left = left; parent->right = right; heap->arr[heap->size] = parent; heap->size++; min_heapify(heap, heap->size - 1); } return heap->arr[0]; } void print_codes(Node *root, int arr[], int top) { if (root->left) { arr[top] = 0; print_codes(root->left, arr, top + 1); } if (root->right) { arr[top] = 1; print_codes(root->right, arr, top + 1); } if (is_leaf_node(root)) { printf("%c: ", root->ch); for (int i = 0; i < top; i++) { printf("%d", arr[i]); } printf("\n"); } } void huffman_encoding(char *str, int *freq) { Node *root = build_huffman_tree(str, freq); int arr[MAX_TREE_HT], top = 0; print_codes(root, arr, top); } void huffman_decoding(Node *root, char *str) { FILE *output_file = fopen("output.txt", "w"); Node *curr = root; for (int i = 0; i < strlen(str); i++) { if (str[i] == '0') { curr = curr->left; } else { curr = curr->right; } if (is_leaf_node(curr)) { fprintf(output_file, "%c", curr->ch); curr = root; } } fclose(output_file); } int main() { char str[] = "Hello, World!"; int freq[256] = {0}; for (int i = 0; i < strlen(str); i++) { freq[(int) str[i]]++; } Node *root = build_huffman_tree(str, freq); huffman_encoding(str, freq); FILE *input_file = fopen("output.txt", "r"); fseek(input_file, 0, SEEK_END); long input_file_size = ftell(input_file); rewind(input_file); char *input_str = (char*) malloc((input_file_size + 1) * sizeof(char)); fread(input_str, sizeof(char), input_file_size, input_file); input_str[input_file_size] = '\0'; fclose(input_file); huffman_decoding(root, input_str); free(input_str); return 0; } ``` 3. LZ编码 ```c #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct { int id; int offset; int length; } Tuple; int find_match(char *str, int i, int j) { int k = 0; while (str[i + k] == str[j + k] && str[i + k] != '\0' && str[j + k] != '\0' && k < 255) { k++; } return k; } Tuple* lz_encoding(char *str) { Tuple *result = (Tuple*) malloc(strlen(str) * sizeof(Tuple)); int result_idx = 0, i = 0; while (i < strlen(str)) { int max_length = 0, max_offset = 0; for (int j = i - 1; j >= 0 && j >= i - 255; j--) { int length = find_match(str, i, j); if (length > max_length) { max_length = length; max_offset = i - j - length; } } if (max_length > 0) { result[result_idx].id = -1; result[result_idx].offset = max_offset; result[result_idx].length = max_length; result_idx++; i += max_length; } else { result[result_idx].id = (int) str[i]; result[result_idx].offset = -1; result[result_idx].length = -1; result_idx++; i++; } } result[result_idx].id = -2; result[result_idx].offset = -2; result[result_idx].length = -2; return result; } void lz_decoding(Tuple *result) { FILE *output_file = fopen("output.txt", "w"); int i = 0; while (result[i].id != -2) { if (result[i].id == -1) { int j = 0; while (j < result[i].length) { char ch = fgetc(output_file - result[i].offset - 1); fprintf(output_file, "%c", ch); j++; } } else { fprintf(output_file, "%c", (char) result[i].id); } i++; } fclose(output_file); } int main() { char str[] = "Hello, World!"; Tuple *result = lz_encoding(str); lz_decoding(result); free(result); return 0; } ``` 4. 算术编码 ```c #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct { int start; int end; } Range; Range new_range(int start, int end) { Range r; r.start = start; r.end = end; return r; } void arithmetic_encoding(char *str, int *freq) { int total_freq = 0; for (int i = 0; i < 256; i++) { total_freq += freq[i]; } Range r = new_range(0, total_freq); for (int i = 0; i < strlen(str); i++) { int ch = (int) str[i]; int freq_sum = 0; for (int j = 0; j < ch; j++) { freq_sum += freq[j]; } Range new_r = new_range(r.start + freq_sum * (r.end - r.start) / total_freq, r.start + (freq_sum + freq[ch]) * (r.end - r.start) / total_freq); r = new_r; } int code = (r.start + r.end) / 2; printf("Arithmetic code: %d\n", code); } void arithmetic_decoding(int code, char *str, int *freq) { int total_freq = 0; for (int i = 0; i < 256; i++) { total_freq += freq[i]; } Range r = new_range(0, total_freq); for (int i = strlen(str) - 1; i >= 0; i--) { int ch = (int) str[i]; int freq_sum = 0; for (int j = 0; j < ch; j++) { freq_sum += freq[j]; } Range new_r = new_range(r.start + freq_sum * (r.end - r.start) / total_freq, r.start + (freq_sum + freq[ch]) * (r.end - r.start) / total_freq); if (code >= new_r.start && code < new_r.end) { printf("%c", (char) ch); r = new_r; code = (code - r.start) * total_freq / (r.end - r.start); } } } int main() { char str[] = "Hello, World!"; int freq[256] = {0}; for (int i = 0; i < strlen(str); i++) { freq[(int) str[i]]++; } arithmetic_encoding(str, freq); arithmetic_decoding(2192935, str, freq); return 0; } ```

相关推荐

最新推荐

recommend-type

信息论课设-香农编码 程序

信息论课设必须的,绝对好用。先输入序列概率、排序、概率累加、求自信息、码长、最后得出码字,基本过程就这样,不是很难。
recommend-type

Huffman与Shannon-Fano编码实验报告

Huffman编码与Shannon-Fano编码简介 算法思想描述 程序源代码及部分注释 运行结果实例及实验总结
recommend-type

香农编码,上机实验报告

将信源符号按概率从大到小的顺序排列。香农编码是信息与编码的一项重要应用。
recommend-type

信息论与编码实验报告(含代码 哈弗曼编码 香农编码)

1. 在VC6.0的环境下实现Huffman编码; 2. 在VC6.0的环境下实现Shannon编码; 3. 在VC6.0的环境下实现4个数的全排列。
recommend-type

06_QLibrary.zip

06_QLibrary.zip
recommend-type

zigbee-cluster-library-specification

最新的zigbee-cluster-library-specification说明文档。
recommend-type

管理建模和仿真的文件

管理Boualem Benatallah引用此版本:布阿利姆·贝纳塔拉。管理建模和仿真。约瑟夫-傅立叶大学-格勒诺布尔第一大学,1996年。法语。NNT:电话:00345357HAL ID:电话:00345357https://theses.hal.science/tel-003453572008年12月9日提交HAL是一个多学科的开放存取档案馆,用于存放和传播科学研究论文,无论它们是否被公开。论文可以来自法国或国外的教学和研究机构,也可以来自公共或私人研究中心。L’archive ouverte pluridisciplinaire
recommend-type

实现实时数据湖架构:Kafka与Hive集成

![实现实时数据湖架构:Kafka与Hive集成](https://img-blog.csdnimg.cn/img_convert/10eb2e6972b3b6086286fc64c0b3ee41.jpeg) # 1. 实时数据湖架构概述** 实时数据湖是一种现代数据管理架构,它允许企业以低延迟的方式收集、存储和处理大量数据。与传统数据仓库不同,实时数据湖不依赖于预先定义的模式,而是采用灵活的架构,可以处理各种数据类型和格式。这种架构为企业提供了以下优势: - **实时洞察:**实时数据湖允许企业访问最新的数据,从而做出更明智的决策。 - **数据民主化:**实时数据湖使各种利益相关者都可
recommend-type

解答下列问题:S—>S;T|T;T—>a 构造任意项目集规范族,构造LR(0)分析表,并分析a;a

对于这个文法,我们可以构造以下项目集规范族: I0: S -> .S S -> .T T -> .a I1: S -> S. [$ T -> T. [$ I2: S -> T. I3: S -> S.;S S -> S.;T T -> T.;a 其中,点(.)表示已经被扫描过的符号,;$表示输入串的结束符号。 根据项目集规范族,我们可以构造出LR(0)分析表: 状态 | a | $ ---- | - | - I0 | s3| I1 | |acc I2 | | 其中s3表示移进到状态3,acc表示接受。在分析字符串a;a时,我们可以按照以下步骤进行
recommend-type

JSBSim Reference Manual

JSBSim参考手册,其中包含JSBSim简介,JSBSim配置文件xml的编写语法,编程手册以及一些应用实例等。其中有部分内容还没有写完,估计有生之年很难看到完整版了,但是内容还是很有参考价值的。