写代码使得输入字符串,输出字符串的哈夫曼编码;输入文本文件(英文),输出哈夫曼编码文件
时间: 2023-06-27 16:02:02 浏览: 115
【C++】根据输入的字符串生成哈夫曼树, 并进行哈夫曼编码和解码
5星 · 资源好评率100%
以下是 Python 代码实现:
```python
import heapq
import os
class HuffmanNode:
def __init__(self, char=None, freq=0, left=None, right=None):
self.char = char
self.freq = freq
self.left = left
self.right = right
def __lt__(self, other):
return self.freq < other.freq
def is_leaf_node(self):
return not self.left and not self.right
def build_frequency_table(text):
freq_table = {}
for char in text:
if char in freq_table:
freq_table[char] += 1
else:
freq_table[char] = 1
return freq_table
def build_huffman_tree(freq_table):
heap = []
for char, freq in freq_table.items():
heapq.heappush(heap, HuffmanNode(char, freq))
while len(heap) > 1:
left_node = heapq.heappop(heap)
right_node = heapq.heappop(heap)
parent_node = HuffmanNode(freq=left_node.freq + right_node.freq, left=left_node, right=right_node)
heapq.heappush(heap, parent_node)
return heap[0]
def build_code_table(huffman_tree):
code_table = {}
def traverse(node, code):
if node.is_leaf_node():
code_table[node.char] = code
else:
traverse(node.left, code + '0')
traverse(node.right, code + '1')
traverse(huffman_tree, '')
return code_table
def encode_text(text, code_table):
encoded_text = ''
for char in text:
encoded_text += code_table[char]
return encoded_text
def encode_file(input_file_path, output_file_path):
with open(input_file_path, 'r') as input_file:
text = input_file.read().replace('\n', '')
freq_table = build_frequency_table(text)
huffman_tree = build_huffman_tree(freq_table)
code_table = build_code_table(huffman_tree)
encoded_text = encode_text(text, code_table)
padding_length = 8 - len(encoded_text) % 8
padding = '0' * padding_length
encoded_text_with_padding = padding + encoded_text + padding_length * '1'
with open(output_file_path, 'wb') as output_file:
for i in range(0, len(encoded_text_with_padding), 8):
byte = encoded_text_with_padding[i:i+8]
output_file.write(bytes([int(byte, 2)]))
def decode_file(input_file_path, output_file_path):
with open(input_file_path, 'rb') as input_file:
byte = input_file.read(1)
encoded_text = ''
while byte != b'':
bits = bin(ord(byte))[2:].rjust(8, '0')
encoded_text += bits
byte = input_file.read(1)
padding_length = int(encoded_text[-8:], 2)
encoded_text = encoded_text[:-8-padding_length]
code_table = build_code_table(build_huffman_tree(build_frequency_table(encoded_text)))
decoded_text = ''
code = ''
for bit in encoded_text:
code += bit
if code in code_table:
decoded_text += code_table[code]
code = ''
with open(output_file_path, 'w') as output_file:
output_file.write(decoded_text)
```
使用方法:
```python
# 编码字符串
text = 'hello world'
freq_table = build_frequency_table(text)
huffman_tree = build_huffman_tree(freq_table)
code_table = build_code_table(huffman_tree)
encoded_text = encode_text(text, code_table)
print(encoded_text)
# 编码文件
encode_file('input.txt', 'output.bin')
# 解码文件
decode_file('output.bin', 'output.txt')
```
阅读全文