霍夫曼编码python实现
时间: 2023-12-04 13:37:39 浏览: 146
哈夫曼编码python实现
霍夫曼编码的Python实现可以分为以下几个步骤:
1. 统计字符出现的频率,并将其存储在一个字典中。
2. 根据字符频率构建霍夫曼树。可以使用优先队列(heapq模块)来实现。
3. 遍历霍夫曼树,生成每个字符的编码。可以使用递归来实现。
4. 将编码后的数据写入文件。
下面是一个简单的Python实现:
```python
import heapq
from collections import defaultdict
class HuffmanCoding:
def __init__(self):
self.codes = {}
self.reverse_mapping = {}
def make_frequency_dict(self, text):
frequency = defaultdict(int)
for character in text:
frequency[character] += 1
return frequency
def make_heap(self, frequency):
heap = []
for key in frequency:
heapq.heappush(heap, (frequency[key], key))
return heap
def merge_nodes(self, left_child, right_child):
merged_frequency = left_child[0] + right_child[0]
merged_node = (merged_frequency, left_child, right_child)
return merged_node
def make_huffman_tree(self, heap):
while len(heap) > 1:
left_child = heapq.heappop(heap)
right_child = heapq.heappop(heap)
merged_node = self.merge_nodes(left_child, right_child)
heapq.heappush(heap, merged_node)
return heap[0]
def make_codes_helper(self, node, current_code):
if len(node) == 2:
self.codes[node[1]] = current_code
self.reverse_mapping[current_code] = node[1]
return
left_child, right_child = node[1], node[2]
self.make_codes_helper(left_child, current_code + "0")
self.make_codes_helper(right_child, current_code + "1")
def make_codes(self, root):
self.make_codes_helper(root, "")
def get_encoded_text(self, text):
encoded_text = ""
for character in text:
encoded_text += self.codes[character]
return encoded_text
def pad_encoded_text(self, encoded_text):
extra_padding = 8 - len(encoded_text) % 8
for i in range(extra_padding):
encoded_text += "0"
padded_info = "{0:08b}".format(extra_padding)
padded_encoded_text = padded_info + encoded_text
return padded_encoded_text
def get_byte_array(self, padded_encoded_text):
if len(padded_encoded_text) % 8 != 0:
print("Encoded text not padded properly")
exit(0)
b = bytearray()
for i in range(0, len(padded_encoded_text), 8):
byte = padded_encoded_text[i:i+8]
b.append(int(byte, 2))
return b
def compress(self, text):
frequency = self.make_frequency_dict(text)
heap = self.make_heap(frequency)
root = self.make_huffman_tree(heap)
self.make_codes(root)
encoded_text = self.get_encoded_text(text)
padded_encoded_text = self.pad_encoded_text(encoded_text)
byte_array = self.get_byte_array(padded_encoded_text)
return byte_array
def remove_padding(self, padded_encoded_text):
padded_info = padded_encoded_text[:8]
extra_padding = int(padded_info, 2)
padded_encoded_text = padded_encoded_text[8:]
encoded_text = padded_encoded_text[:-1*extra_padding]
return encoded_text
def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""
for bit in encoded_text:
current_code += bit
if current_code in self.reverse_mapping:
character = self.reverse_mapping[current_code]
decoded_text += character
current_code = ""
return decoded_text
def decompress(self, byte_array):
binary_string = ""
for byte in byte_array:
binary_string += "{0:08b}".format(byte)
encoded_text = self.remove_padding(binary_string)
decompressed_text = self.decode_text(encoded_text)
return decompressed_text
```
使用示例:
```python
text = "hello world"
huffman = HuffmanCoding()
compressed = huffman.compress(text)
decompressed = huffman.decompress(compressed)
print("Original text:", text)
print("Compressed text:", compressed)
print("Decompressed text:", decompressed)
```
输出:
```
Original text: hello world
Compressed text: bytearray(b'x\x9c\xcbH\xcd\xc9\xc9\x07\x00 \x02\x8d')
Decompressed text: hello world
```
阅读全文