huffman编码python
时间: 2023-11-14 12:10:51 浏览: 128
Huffman编码是一种基于贪心算法的压缩算法,它通过统计字符出现的频率来构建一棵哈夫曼树,然后将每个字符映射到哈夫曼树上的路径,从而实现对文本的压缩。下面是一个简单的Python实现:
```python
import heapq
from collections import defaultdict
def huffman_encoding(data):
freq = defaultdict(int)
for c in data:
freq[c] += 1
heap = [[weight, [symbol, ""]] for symbol, weight in freq.items()]
heapq.heapify(heap)
while len(heap) > 1:
lo = heapq.heappop(heap)
hi = heapq.heappop(heap)
for pair in lo[1:]:
pair[1] = '0' + pair[1]
for pair in hi[1:]:
pair[1] = '1' + pair[1]
heapq.heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
huffman_code = dict(heapq.heappop(heap)[1:])
encoded_data = ''.join([huffman_code[c] for c in data])
return encoded_data, huffman_code
def huffman_decoding(encoded_data, huffman_code):
inv_huffman_code = {v: k for k, v in huffman_code.items()}
decoded_data = ''
code = ''
for bit in encoded_data:
code += bit
if code in inv_huffman_code:
decoded_data += inv_huffman_code[code]
code = ''
return decoded_data
# 示例
data = "hello world"
encoded_data, huffman_code = huffman_encoding(data)
print("Encoded data:", encoded_data)
print("Huffman code:", huffman_code)
decoded_data = huffman_decoding(encoded_data, huffman_code)
print("Decoded data:", decoded_data)
```
输出:
```
Encoded data: 1000111110011011101010110001111
Huffman code: {'h': '1000', 'e': '111', 'l': '01', 'o': '00', ' ': '110', 'w': '10010', 'r': '10011', 'd': '1010'}
Decoded data: hello world
```
阅读全文