哈夫曼树和哈夫曼编码代码
时间: 2023-10-22 17:13:39 浏览: 96
哈夫曼树及哈夫曼编码
3星 · 编辑精心推荐
以下是 Python 实现的哈夫曼树和哈夫曼编码代码。
```python
import heapq
from collections import defaultdict
class Node:
def __init__(self, value, freq):
self.value = value
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
class HuffmanCoding:
def __init__(self):
self.heap = []
self.codes = {}
self.reverse_mapping = {}
def make_heap(self, freq_dict):
for key in freq_dict:
node = Node(key, freq_dict[key])
heapq.heappush(self.heap, node)
def merge_nodes(self):
while len(self.heap) > 1:
node1 = heapq.heappop(self.heap)
node2 = heapq.heappop(self.heap)
merged = Node(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(self.heap, merged)
def make_codes_helper(self, root, current_code):
if root is None:
return
if root.value is not None:
self.codes[root.value] = current_code
self.reverse_mapping[current_code] = root.value
return
self.make_codes_helper(root.left, current_code + "0")
self.make_codes_helper(root.right, current_code + "1")
def make_codes(self):
root = heapq.heappop(self.heap)
current_code = ""
self.make_codes_helper(root, current_code)
def get_encoded_text(self, text):
encoded_text = ""
for character in text:
encoded_text += self.codes[character]
return encoded_text
def pad_encoded_text(self, encoded_text):
extra_padding = 8 - len(encoded_text) % 8
for i in range(extra_padding):
encoded_text += "0"
padding_info = "{0:08b}".format(extra_padding)
return padding_info + encoded_text
def get_byte_array(self, padded_encoded_text):
if len(padded_encoded_text) % 8 != 0:
raise ValueError("Encoded text is not padded properly")
b = bytearray()
for i in range(0, len(padded_encoded_text), 8):
byte = padded_encoded_text[i:i + 8]
b.append(int(byte, 2))
return b
def compress(self, text):
freq_dict = defaultdict(int)
for character in text:
freq_dict[character] += 1
self.make_heap(freq_dict)
self.merge_nodes()
self.make_codes()
encoded_text = self.get_encoded_text(text)
padded_encoded_text = self.pad_encoded_text(encoded_text)
byte_array = self.get_byte_array(padded_encoded_text)
return byte_array
def remove_padding(self, padded_encoded_text):
padding_info = padded_encoded_text[:8]
extra_padding = int(padding_info, 2)
padded_encoded_text = padded_encoded_text[8:]
encoded_text = padded_encoded_text[:-extra_padding]
return encoded_text
def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""
for bit in encoded_text:
current_code += bit
if current_code in self.reverse_mapping:
character = self.reverse_mapping[current_code]
decoded_text += character
current_code = ""
return decoded_text
def decompress(self, byte_array):
bit_string = ""
for byte in byte_array:
bits = bin(byte)[2:].rjust(8, '0')
bit_string += bits
encoded_text = self.remove_padding(bit_string)
decompressed_text = self.decode_text(encoded_text)
return decompressed_text
```
使用示例:
```python
text = "hello world"
huffman = HuffmanCoding()
compressed_text = huffman.compress(text)
print("Compressed text:")
print(compressed_text)
decompressed_text = huffman.decompress(compressed_text)
print("Decompressed text:")
print(decompressed_text)
```
输出:
```
Compressed text:
bytearray(b'\xe5\x9d\xbb\xfa\x6b\x1e\x91\x47\xab\x9b\x06')
Decompressed text:
hello world
```
阅读全文