霍夫曼编码实现字符统计
时间: 2024-02-20 21:35:23 浏览: 100
霍夫曼编码程序(大连海事大学)
可以使用霍夫曼编码来实现字符统计。霍夫曼编码是一种可变长度编码方式,根据字符出现的频率来分配较短的编码给出现频率较高的字符,以此来达到压缩数据的目的。
下面是一个简单的示例代码,实现了字符统计和霍夫曼编码/解码功能:
```python
import heapq
from collections import defaultdict
# 定义节点类
class Node:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
# 定义小于运算符,用于堆排序
def __lt__(self, other):
return self.freq < other.freq
# 统计字符频率
def count_freq(text):
freq_dict = defaultdict(int)
for char in text:
freq_dict[char] += 1
return freq_dict
# 构建霍夫曼树
def build_huffman_tree(freq_dict):
heap = []
for char, freq in freq_dict.items():
node = Node(char, freq)
heapq.heappush(heap, node)
while len(heap) > 1:
left_child = heapq.heappop(heap)
right_child = heapq.heappop(heap)
parent_freq = left_child.freq + right_child.freq
parent = Node(None, parent_freq)
parent.left = left_child
parent.right = right_child
heapq.heappush(heap, parent)
return heapq.heappop(heap)
# 生成霍夫曼编码字典
def build_huffman_codes(node, code, huffman_codes):
if node.char is not None:
huffman_codes[node.char] = code
else:
build_huffman_codes(node.left, code + '0', huffman_codes)
build_huffman_codes(node.right, code + '1', huffman_codes)
# 编码文本
def encode_text(text, huffman_codes):
encoded_text = ''
for char in text:
encoded_text += huffman_codes[char]
return encoded_text
# 解码文本
def decode_text(encoded_text, huffman_tree):
decoded_text = ''
node = huffman_tree
for bit in encoded_text:
if bit == '0':
node = node.left
else:
node = node.right
if node.char is not None:
decoded_text += node.char
node = huffman_tree
return decoded_text
# 示例用法
def main():
text = "hello world"
freq_dict = count_freq(text)
huffman_tree = build_huffman_tree(freq_dict)
huffman_codes = {}
build_huffman_codes(huffman_tree, '', huffman_codes)
encoded_text = encode_text(text, huffman_codes)
decoded_text = decode_text(encoded_text, huffman_tree)
print("原始文本:", text)
print("编码后:", encoded_text)
print("解码后:", decoded_text)
if __name__ == "__main__":
main()
```
这段代码将输入的文本进行字符统计,然后构建霍夫曼树,并生成霍夫曼编码。最后,使用生成的霍夫曼编码对文本进行编码和解码操作。
阅读全文