哈夫曼编码是一种基于字符出现频率的编码方式,可以用于文件压缩。实现文件压缩的过程可以分为以下几个步骤: 统计文件中每个字符出现的频率。 根据字符出现频率构建哈夫曼树。 根据哈夫曼树构建哈夫曼编码表。 使用哈夫曼编码表将文件内容进行编码。 将编码后的内容写入到压缩文件中。 将哈夫曼树和编码表写入到压缩文件的文件头中。 解压缩的过程可以分为以下几个步骤: 读取压缩文件的文件头,获取哈夫曼树和编码表。 根据哈夫曼树和编码表解码压缩文件的内容。 将解码后的内容写入到解压缩后的文件中。将上述过程用程序写出来
时间: 2024-03-14 07:43:38 浏览: 128
以下是Python代码实现文件压缩和解压缩的过程,包含了统计字符频率、构建哈夫曼树、生成编码表、编码、解码等步骤。其中,文件头中包括哈夫曼树和编码表的信息,使用pickle库进行序列化和反序列化。
```python
import heapq
import os
import pickle
class HuffmanNode:
def __init__(self, freq, char=None, left=None, right=None):
self.freq = freq
self.char = char
self.left = left
self.right = right
def __lt__(self, other):
return self.freq < other.freq
def __eq__(self, other):
return self.freq == other.freq
class HuffmanTree:
def __init__(self):
self.root = None
self.codes = {}
self.reverse_codes = {}
def build_tree(self, freq_table):
heap = []
for char, freq in freq_table.items():
heapq.heappush(heap, HuffmanNode(freq, char))
while len(heap) > 1:
left = heapq.heappop(heap)
right = heapq.heappop(heap)
freq_sum = left.freq + right.freq
node = HuffmanNode(freq_sum, left.char + right.char, left, right)
heapq.heappush(heap, node)
self.root = heapq.heappop(heap)
def generate_codes(self):
def _generate_codes(node, current_code):
if node is None:
return
if node.char is not None:
self.codes[node.char] = current_code
self.reverse_codes[current_code] = node.char
return
_generate_codes(node.left, current_code + '0')
_generate_codes(node.right, current_code + '1')
_generate_codes(self.root, '')
def encode(self, text):
encoded_text = ''
for char in text:
encoded_text += self.codes[char]
return encoded_text
def decode(self, encoded_text):
decoded_text = ''
current_code = ''
for bit in encoded_text:
current_code += bit
if current_code in self.reverse_codes:
char = self.reverse_codes[current_code]
decoded_text += char
current_code = ''
return decoded_text
def get_freq_table(text):
freq_table = {}
for char in text:
if char in freq_table:
freq_table[char] += 1
else:
freq_table[char] = 1
return freq_table
def compress_file(input_file_path, output_file_path):
with open(input_file_path, 'r') as input_file:
text = input_file.read()
freq_table = get_freq_table(text)
tree = HuffmanTree()
tree.build_tree(freq_table)
tree.generate_codes()
encoded_text = tree.encode(text)
with open(output_file_path, 'wb') as output_file:
pickle.dump(tree.root, output_file)
pickle.dump(tree.codes, output_file)
output_file.write(int(encoded_text, 2).to_bytes((len(encoded_text) + 7) // 8, byteorder='big'))
def decompress_file(input_file_path, output_file_path):
with open(input_file_path, 'rb') as input_file:
root = pickle.load(input_file)
codes = pickle.load(input_file)
data = input_file.read()
encoded_text = bin(int.from_bytes(data, byteorder='big'))[2:]
tree = HuffmanTree()
tree.root = root
tree.codes = codes
tree.generate_codes()
decoded_text = tree.decode(encoded_text)
with open(output_file_path, 'w') as output_file:
output_file.write(decoded_text)
input_file_path = 'input.txt'
compressed_file_path = 'compressed.bin'
decompressed_file_path = 'decompressed.txt'
compress_file(input_file_path, compressed_file_path)
decompress_file(compressed_file_path, decompressed_file_path)
print(f'Original file size: {os.path.getsize(input_file_path)} bytes')
print(f'Compressed file size: {os.path.getsize(compressed_file_path)} bytes')
```
注意:上述代码仅供参考,实际应用中还需要考虑一些细节问题,例如在编码过程中如何处理文件末尾不足8位的情况,以及如何处理编码表中出现在文件中未出现的字符等。
阅读全文