python实现哈夫曼编码解码具体代码
时间: 2023-07-19 17:33:34 浏览: 117
哈夫曼编码以及解码实现
3星 · 编辑精心推荐
以下是Python实现哈夫曼编码解码的具体代码:
```python
import heapq
from collections import defaultdict
class HuffmanCoding:
def __init__(self, text):
self.text = text
self.freq = defaultdict(int)
self.heap = []
self.codes = {}
self.reverse_mapping = {}
def make_frequency_dict(self):
for char in self.text:
self.freq[char] += 1
def make_heap(self):
for key in self.freq:
heapq.heappush(self.heap, (self.freq[key], key))
def merge_nodes(self):
while len(self.heap) > 1:
min1 = heapq.heappop(self.heap)
min2 = heapq.heappop(self.heap)
merged = (min1[0] + min2[0], min1[1] + min2[1])
for symbol in min1[1]:
self.codes[symbol] = '0' + self.codes.get(symbol, '')
self.reverse_mapping['0' + self.reverse_mapping.get(symbol, '')] = symbol
for symbol in min2[1]:
self.codes[symbol] = '1' + self.codes.get(symbol, '')
self.reverse_mapping['1' + self.reverse_mapping.get(symbol, '')] = symbol
heapq.heappush(self.heap, merged)
def make_codes(self):
root = heapq.heappop(self.heap)
code = ''
if len(root[1]) == 1:
self.codes[root[1]] = '0'
else:
for symbol in root[1]:
self.codes[symbol] = code + self.codes.get(symbol, '')
self.reverse_mapping[code + self.reverse_mapping.get(symbol, '')] = symbol
def get_encoded_text(self):
encoded_text = ''
for char in self.text:
encoded_text += self.codes[char]
return encoded_text
def pad_encoded_text(self, encoded_text):
extra_padding = 8 - len(encoded_text) % 8
for i in range(extra_padding):
encoded_text += '0'
padded_info = "{0:08b}".format(extra_padding)
encoded_text = padded_info + encoded_text
return encoded_text
def get_byte_array(self, padded_encoded_text):
if len(padded_encoded_text) % 8 != 0:
print("Encoded text not padded properly")
exit(0)
b = bytearray()
for i in range(0, len(padded_encoded_text), 8):
byte = padded_encoded_text[i:i + 8]
b.append(int(byte, 2))
return b
def compress(self):
self.make_frequency_dict()
self.make_heap()
self.merge_nodes()
self.make_codes()
encoded_text = self.get_encoded_text()
padded_encoded_text = self.pad_encoded_text(encoded_text)
byte_array = self.get_byte_array(padded_encoded_text)
return byte_array
def remove_padding(self, padded_encoded_text):
padded_info = padded_encoded_text[:8]
extra_padding = int(padded_info, 2)
padded_encoded_text = padded_encoded_text[8:]
encoded_text = padded_encoded_text[:-1 * extra_padding]
return encoded_text
def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""
for bit in encoded_text:
current_code += bit
if current_code in self.reverse_mapping:
character = self.reverse_mapping[current_code]
decoded_text += character
current_code = ""
return decoded_text
def decompress(self, byte_array):
bit_string = ""
for byte in byte_array:
bits = bin(byte)[2:].rjust(8, '0')
bit_string += bits
encoded_text = self.remove_padding(bit_string)
decompressed_text = self.decode_text(encoded_text)
return decompressed_text
```
使用方法:
```python
text = "hello world"
huffmanCoding = HuffmanCoding(text)
compressed = huffmanCoding.compress()
decompressed = huffmanCoding.decompress(compressed)
assert (text == decompressed)
```
其中,`HuffmanCoding` 类实现了哈夫曼编码和解码的逻辑。在 `compress` 方法中,先统计文本中各个字符出现的频率,然后使用堆来合并节点,生成哈夫曼树,接着根据哈夫曼树生成编码,并将编码转换为字节流。在 `decompress` 方法中,首先解析字节流,然后使用哈夫曼编码解码出原始文本。
阅读全文