基于哈夫曼树的文件压缩与解压
时间: 2024-04-05 09:25:57 浏览: 69
用哈夫曼树实现压缩解压.pdf
基于哈夫曼树的文件压缩与解压是一种常见的压缩算法,它通过构建哈夫曼树来实现对文件的压缩和解压。下面是一个基于哈夫曼树的文件压缩与解压的示例代码:
压缩文件:
```python
import heapq
import os
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
def build_frequency_table(data):
frequency_table = {}
for char in data:
if char in frequency_table:
frequency_table[char] += 1
else:
frequency_table[char] = 1
return frequency_table
def build_huffman_tree(frequency_table):
heap = []
for char, freq in frequency_table.items():
node = HuffmanNode(char, freq)
heapq.heappush(heap, node)
while len(heap) > 1:
node1 = heapq.heappop(heap)
node2 = heapq.heappop(heap)
merged = HuffmanNode(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(heap, merged)
return heap[0]
def build_encoding_table(root):
encoding_table = {}
def build_encoding_table_helper(node, code):
if node is None:
return
if node.char is not None:
encoding_table[node.char] = code
build_encoding_table_helper(node.left, code + "0")
build_encoding_table_helper(node.right, code + "1")
build_encoding_table_helper(root, "")
return encoding_table
def compress(data):
frequency_table = build_frequency_table(data)
huffman_tree = build_huffman_tree(frequency_table)
encoding_table = build_encoding_table(huffman_tree)
compressed_data = ""
for char in data:
compressed_data += encoding_table[char]
padding = 8 - len(compressed_data) % 8
compressed_data += padding * "0"
padded_info = "{0:08b}".format(padding)
compressed_data = padded_info + compressed_data
byte_array = bytearray()
for i in range(0, len(compressed_data), 8):
byte = compressed_data[i:i+8]
byte_array.append(int(byte, 2))
return byte_array
def decompress(byte_array):
bit_string = ""
for byte in byte_array:
bits = bin(byte)[2:].rjust(8, "0")
bit_string += bits
padding = int(bit_string[:8], 2)
bit_string = bit_string[8:-padding]
decoding_table = {}
huffman_tree = build_huffman_tree(build_frequency_table(data))
def build_decoding_table_helper(node, code):
if node is None:
return
if node.char is not None:
decoding_table[code] = node.char
build_decoding_table_helper(node.left, code + "0")
build_decoding_table_helper(node.right, code + "1")
build_decoding_table_helper(huffman_tree, "")
current_code = ""
decompressed_data = ""
for bit in bit_string:
current_code += bit
if current_code in decoding_table:
char = decoding_table[current_code]
decompressed_data += char
current_code = ""
return decompressed_data
# 示例用法
data = "hello world"
compressed_data = compress(data)
print("Compressed data:", compressed_data)
decompressed_data = decompress(compressed_data)
print("Decompressed data:", decompressed_data)
```
解压文件:
```python
import heapq
import os
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
def build_frequency_table(data):
frequency_table = {}
for char in data:
if char in frequency_table:
frequency_table[char] += 1
else:
frequency_table[char] = 1
return frequency_table
def build_huffman_tree(frequency_table):
heap = []
for char, freq in frequency_table.items():
node = HuffmanNode(char, freq)
heapq.heappush(heap, node)
while len(heap) > 1:
node1 = heapq.heappop(heap)
node2 = heapq.heappop(heap)
merged = HuffmanNode(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(heap, merged)
return heap[0]
def build_encoding_table(root):
encoding_table = {}
def build_encoding_table_helper(node, code):
if node is None:
return
if node.char is not None:
encoding_table[node.char] = code
build_encoding_table_helper(node.left, code + "0")
build_encoding_table_helper(node.right, code + "1")
build_encoding_table_helper(root, "")
return encoding_table
def compress(data):
frequency_table = build_frequency_table(data)
huffman_tree = build_huffman_tree(frequency_table)
encoding_table = build_encoding_table(huffman_tree)
compressed_data = ""
for char in data:
compressed_data += encoding_table[char]
padding = 8 - len(compressed_data) % 8
compressed_data += padding * "0"
padded_info = "{0:08b}".format(padding)
compressed_data = padded_info + compressed_data
byte_array = bytearray()
for i in range(0, len(compressed_data), 8):
byte = compressed_data[i:i+8]
byte_array.append(int(byte, 2))
return byte_array
def decompress(byte_array):
bit_string = ""
for byte in byte_array:
bits = bin(byte)[2:].rjust(8, "0")
bit_string += bits
padding = int(bit_string[:8], 2)
bit_string = bit_string[8:-padding]
decoding_table = {}
huffman_tree = build_huffman_tree(build_frequency_table(data))
def build_decoding_table_helper(node, code):
if node is None:
return
if node.char is not None:
decoding_table[code] = node.char
build_decoding_table_helper(node.left, code + "0")
build_decoding_table_helper(node.right, code + "1")
build_decoding_table_helper(huffman_tree, "")
current_code = ""
decompressed_data = ""
for bit in bit_string:
current_code += bit
if current_code in decoding_table:
char = decoding_table[current_code]
decompressed_data += char
current_code = ""
return decompressed_data
# 示例用法
compressed_data = bytearray([104, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
阅读全文